Skip to content

Commit

Permalink
prettier
Browse files Browse the repository at this point in the history
  • Loading branch information
niczem committed Jun 20, 2024
1 parent 3b7c620 commit b7c33ca
Showing 1 changed file with 16 additions and 20 deletions.
36 changes: 16 additions & 20 deletions datasources/facebook_posts/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const fs = require('fs').promises;
const timeout = 7000;
const run_headless = process.env.run_headless;

const cookie_file = './data/_sessiondata/cookies.json'
const cookie_file = './data/_sessiondata/cookies.json';
let browser;

class Utils {
Expand All @@ -32,18 +32,17 @@ class FacebookCrawler {
async getPosts(pagename, limit = 3, callback) {
try {
console.log(`limit: ${limit}`);
if (browser == null)
browser = await new Utils().createBrowserInstance();
if (browser == null) browser = await new Utils().createBrowserInstance();
const page = await browser.newPage();
const cookiesString = await fs.readFile(cookie_file);
const cookies = JSON.parse(cookiesString);
await page.setCookie(...cookies);

await page.goto('https://m.facebook.com/' + pagename);
page.setViewport({
width: 1000,
height: 1500
})
width: 1000,
height: 1500,
});
let last_length = 0;
let limit_count = 0;

Expand Down Expand Up @@ -116,24 +115,23 @@ class FacebookCrawler {
await self.autoScroll(page);
console.log(`autoscroll finished ${limit_count}/${limit}`);

console.log('take screenshot')
console.log('take screenshot');
await page.screenshot({
path: './data/screenshot_'+pagename+'_'+limit_count+'.jpg',
path: './data/screenshot_' + pagename + '_' + limit_count + '.jpg',
fullPage: false,
type: 'jpeg',
captureBeyondViewport: true,

});
console.log('./data/screenshot_'+pagename+'_'+limit_count+'.jpg')
console.log(
'./data/screenshot_' + pagename + '_' + limit_count + '.jpg'
);
// Capture screenshot
if (
limit_count >= limit
limit_count >= limit
//|| not_increased >= 30 //exit if amount does not increase after 3 intervals
) {
console.log('done');



clearInterval(interval);

//add db etries
Expand Down Expand Up @@ -183,8 +181,7 @@ class FacebookCrawler {

async getComments(post_id, link, limit = 3, callback) {
const comment_url = link;
if (browser == null)
browser = await new Utils().createBrowserInstance();
if (browser == null) browser = await new Utils().createBrowserInstance();
const page = await browser.newPage();
const cookiesString = await fs.readFile(cookie_file);
const cookies = JSON.parse(cookiesString);
Expand Down Expand Up @@ -294,8 +291,7 @@ class FacebookCrawler {

console.log(link);

if (browser == null)
browser = await new Utils().createBrowserInstance();
if (browser == null) browser = await new Utils().createBrowserInstance();
const page = await browser.newPage();
const cookiesString = await fs.readFile(cookie_file);
const cookies = JSON.parse(cookiesString);
Expand Down Expand Up @@ -417,11 +413,11 @@ class FacebookCrawler {
//press enter
console.log('logged in, now waiting 20s');
//long timeout is needed because fb is slow af
await new Promise(r => setTimeout(r, 20000));
await new Promise((r) => setTimeout(r, 20000));
return setTimeout(async function () {
try {
const cookies = await page.cookies();
console.log("WRITING COOKIES", cookies);
console.log('WRITING COOKIES', cookies);
await fs.writeFile(cookie_file, JSON.stringify(cookies, null, 2));
browser.close();
} catch (e) {
Expand Down Expand Up @@ -454,7 +450,7 @@ module.exports = class Datasource extends Worker {
function (posts) {
console.log('done crawling posts... add jobs for comments');
console.log(posts);
console.log(sql.Post)
console.log(sql.Post);
sql.Post.bulkCreate(posts);
if (job.properties.continue)
for (let i in posts) {
Expand Down

0 comments on commit b7c33ca

Please sign in to comment.