Skip to content
Merged
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ed9c625
Add exclusion feed support
th3raid0r Jul 4, 2023
243e8c1
Update main.js
th3raid0r Jul 4, 2023
579fc9e
Check date of feed item before posting
th3raid0r Jul 4, 2023
13ab5d2
fix dc:date reference.
th3raid0r Jul 4, 2023
ba62cd6
runatstart - true
th3raid0r Jul 4, 2023
e38ec5c
Add this so I know it's executing.
th3raid0r Jul 4, 2023
5045eda
more logs
th3raid0r Jul 4, 2023
d6a02ae
move cutoffdate eval out of item loop.
th3raid0r Jul 4, 2023
5c2512d
Get the actual itemdate
th3raid0r Jul 4, 2023
296dd6e
Trim any whitespace
th3raid0r Jul 4, 2023
5b18a20
more logging
th3raid0r Jul 4, 2023
88791c2
more verbosity
th3raid0r Jul 4, 2023
d7e6fcd
add log when fetcher is done
th3raid0r Jul 4, 2023
6520470
A bit more accurate log of what's going on
th3raid0r Jul 4, 2023
07a4fa4
more precice area for this
th3raid0r Jul 4, 2023
817ba52
More logging.
th3raid0r Jul 4, 2023
79576a2
Fix missing .url property ref
th3raid0r Jul 4, 2023
1a76458
think I have excludes fixed now
th3raid0r Jul 4, 2023
e62de09
Hopefully fix the exclude not working.
th3raid0r Jul 4, 2023
3134adb
Betterer exclusion logic
th3raid0r Jul 4, 2023
30e1e4d
Refactor and add joinfeeds
th3raid0r Jul 13, 2023
d6fd759
Use the same style of rss feed for consistency
th3raid0r Jul 13, 2023
6027141
set vars back to upstream
th3raid0r Jul 13, 2023
b2af779
More reversions for upstream, new param 4 date.
th3raid0r Jul 13, 2023
662d44a
Small fix on cron expression
th3raid0r Jul 13, 2023
b277af2
fix typo that time
th3raid0r Jul 13, 2023
38758d8
remove inconsistent white space
th3raid0r Jul 13, 2023
9765257
smarter date field evaluation.
th3raid0r Jul 13, 2023
dc658a6
Fix different combos of joins and excludes
th3raid0r Jul 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 136 additions & 28 deletions main.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,45 @@ const communities = [
},
]

// Feed data is stored in the following format:
// joinfeeds will only include posts in common between the source feed and those in the list - It is processed first.
// exclude will remove posts from the feed based on the contents of another feed - It is processed second.
// pinCategories will pin posts in the feed that match the category name and are within the specified number of days
// content is the name of the field in the feed that contains the post content. Defaults to 'content' if not specified
// datefield is the name of the field in the feed that contains the post date. Defaults to 'pubDate' if not specified
//
// const feeds = [
// {
// name: 'feedname',
// url: 'https://www.some-news-site.com/category/rss/news/',
// content: 'description',
// exclude: [
// 'feedname2', // the feed contains posts from feedname2, which we don't want. So we exclude feedname2 to get feedname only.
// ],
// joinfeeds: [
// 'feedname3', // the feed contains posts from feedname3, which we want. So we join feedname3 to get feedname and feedname3.
// ],
// pinCategories: [
// { name: 'categoryname', days: 7 }, // the feed contains posts from categoryname, which we want. So we pin categoryname posts from the feed.
// ]
// },
// {
// name: 'feedname2',
// url: 'https://www.some-news-site.com/category/rss/politics/',
// content: 'content'
// },
// {
// name: 'feedname3',
// url: 'https://www.some-news-site.com/category/rss/localnews/',
// content: 'content'
// }
// ]

const feeds = [
{
name: 'godot',
url: 'https://godotengine.org/rss.xml',
datefield: 'pubDate',
pinCategories: [
{ name: 'Release', days: 7 },
{ name: 'Pre-release', days: 7 },
Expand All @@ -92,13 +127,21 @@ const feeds = [
name: 'unreal',
url: 'https://www.unrealengine.com/en-US/rss',
content: 'summary',
datefield: 'published',
},
{
name: 'unity',
url: 'https://blogs.unity3d.com/feed/',
datefield: 'pubDate',
}
]

const sleepDuration = process.env.RATE_LIMIT_MS || 2000;

function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}

// -----------------------------------------------------------------------------
// Main Bot Code

Expand Down Expand Up @@ -164,45 +207,110 @@ const bot = new LemmyBot.LemmyBot({
cronExpression: '0 */10 * * * *',
timezone: 'America/Toronto',
doTask: async ({getCommunityId, createPost}) => {
console.log(`${chalk.green('STARTED:')} RSS Feed Fetcher.`);
for (const feed of feeds) {
const rss = await parser.parseURL(feed.url);

const cutoffDate = new Date();
console.log(`${chalk.white('CURRENT DATE:')} ${cutoffDate}`);
cutoffDate.setMonth(cutoffDate.getMonth() - 6); // set to 6 months ago
console.log(`${chalk.white('CUTOFF DATE:')} ${cutoffDate}`);

let joinedItems = [];
// gather all items from feeds to be joined
if (feed.joinfeeds) {
console.log(`${chalk.white('FETCHING:')} joining feeds for ${feed.name}`);
for (const joinFeedName of feed.joinfeeds) {
const joinFeed = feeds.find(f => f.name === joinFeedName);

for (const item of rss.items) {
let pin_days = 0;
// if has categories then see if it's a pin
if (feed.pinCategories && item.categories) {
for (const category of item.categories) {
const found_category = feed.pinCategories.find(c => c.name === category);
if (found_category) {
pin_days = found_category.days;
}
if (joinFeed) {
const joinRss = await parser.parseURL(joinFeed.url);
joinedItems = joinedItems.concat(joinRss.items);
}
}
}


db.run(`INSERT INTO posts (link, pin_days, featured) VALUES (?, ?, ?)`, [item.link, pin_days, pin_days > 0 ? 1 : 0], async (err) => {
if (err) {
if (err.message.includes('UNIQUE constraint failed')) {
// do nothing
return;
} else {
return console.error(err.message);
let excludeItems = [];


// exclude feeds
if (feed.exclude) {
console.log(`${chalk.white('FETCHING:')} exclusion feeds for ${feed.name}`);
for (const excludeFeedName of feed.exclude) {
const excludeFeed = feeds.find(f => f.name === excludeFeedName);

if (excludeFeed) {
const excludeRss = await parser.parseURL(excludeFeed.url);
for (const excludeItem of excludeRss.items) {
excludeItems.push(excludeItem.link);
}
}
}
}

let commonItems = rss.items.filter(item => {
if (feed.joinfeeds && feed.exclude) {
return joinedItems.map(i => i.link).includes(item.link) && !excludeItems.includes(item.link);
} else if (feed.joinfeeds) {
return joinedItems.map(i => i.link).includes(item.link);
} else if (feed.exclude) {
return !excludeItems.includes(item.link);
} else {
return true;
}
});

for (const community of communities) {
if (community.feeds.includes(feed.name)) {
const communityId = await getCommunityId({ name: community.slug, instance: community.instance })
await createPost({
name: item.title,
body: ((feed.content && feed.content === 'summary') ? item.summary : item.content),
url: item.link || undefined,
community_id: communityId,
});
for (const item of commonItems) {
let pin_days = 0;
const itemDate = new Date((feed.datefield ? item[feed.datefield] : item.pubDate).trim());
console.log(`${chalk.white('ITEM DATE:')} ${itemDate}`);
//if item is newer than 6 months old, continue
if (itemDate > cutoffDate) {
console.log(`${chalk.green('RECENT:')} true`);
console.log(`${chalk.white('LINK:')} ${item.link}`);
// if has categories then see if it's a pin
if (feed.pinCategories && item.categories) {
for (const category of item.categories) {
const found_category = feed.pinCategories.find(c => c.name === category);
if (found_category) {
pin_days = found_category.days;
}
}
}
console.log(`${chalk.green('ADDED:')} ${item.link} for ${pin_days} days`);
});

db.run(`INSERT INTO posts (link, pin_days, featured) VALUES (?, ?, ?)`, [item.link, pin_days, pin_days > 0 ? 1 : 0], async (err) => {
if (err) {
if (err.message.includes('UNIQUE constraint failed')) {
// do nothing
console.log(`${chalk.yellow('PRESENT:')} ${item.link} already present`);
return;
} else {
return console.error(err.message);
}
}
console.log(`${chalk.green('INSERTED:')} ${item.link} into database.`);

for (const community of communities) {
if (community.feeds.includes(feed.name)) {
console.log(`${chalk.green('CREATING:')} post for link ${item.link} in ${community.slug }`);
const communityId = await getCommunityId({ name: community.slug, instance: community.instance });
await createPost({
name: item.title,
body: ((feed.content && feed.content === 'summary') ? item.summary : item.content),
url: item.link || undefined,
community_id: communityId,
});
await sleep(sleepDuration);

}
}
console.log(`${chalk.green('ADDED:')} ${item.link} for ${pin_days} days`);
});
}

}
console.log(`${chalk.green('COMPLETE:')} Feed ${feed.name} processed.`);
}
}
},
Expand Down Expand Up @@ -262,4 +370,4 @@ const bot = new LemmyBot.LemmyBot({
]
});

bot.start();
bot.start();