Skip to content

Commit

Permalink
Scraping coding.napolux.com
Browse files Browse the repository at this point in the history
  • Loading branch information
napolux committed Aug 17, 2018
1 parent 161f288 commit 9bddaa8
Show file tree
Hide file tree
Showing 3 changed files with 365 additions and 0 deletions.
41 changes: 41 additions & 0 deletions index.js
@@ -0,0 +1,41 @@
const puppeteer = require('puppeteer');
const URL = 'https://coding.napolux.com';

puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] }).then(async browser => {
const page = await browser.newPage();
await page.setViewport({width: 320, height: 600})
await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 9_0_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13A404 Safari/601.1')

await page.goto(URL, {waitUntil: 'networkidle0'});
await page.waitForSelector('body.blog');
await page.addScriptTag({url: 'https://code.jquery.com/jquery-3.2.1.min.js'})

const result = await page.evaluate(() => {
try {
var data = [];
$('h3.loop__post-title').each(function() {
const url = $(this).find('a').attr('href');
const title = $(this).find('a').attr('title')
data.push({
'title' : title,
'url' : url
});
});
return data; // Return our data array
} catch(err) {
reject(err.toString());
}
});

// let's close the browser
await browser.close();

// ok, let's log blog titles...
for(var i = 0; i < result.length; i++) {
console.log('Post: ' + result[i].title + ' URL: ' + result[i].url);
}
process.exit();
}).catch(function(error) {
console.error('No way Paco!');
process.exit();
});
302 changes: 302 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions package.json
@@ -0,0 +1,22 @@
{
"name": "puppy",
"version": "1.0.0",
"description": "A little puppy",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git+https://github.com/napolux/puppy.git"
},
"author": "napolux",
"license": "MIT",
"bugs": {
"url": "https://github.com/napolux/puppy/issues"
},
"homepage": "https://github.com/napolux/puppy#readme",
"dependencies": {
"puppeteer": "^1.7.0"
}
}

0 comments on commit 9bddaa8

Please sign in to comment.