Skip to content

Commit

Permalink
Use pipeline library
Browse files Browse the repository at this point in the history
  • Loading branch information
kremio committed Dec 9, 2018
1 parent 78500f5 commit 5eb90c6
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 303 deletions.
2 changes: 1 addition & 1 deletion package.json
Expand Up @@ -30,7 +30,7 @@
"dependencies": {
"cheerio": "^1.0.0-rc.2",
"request": "^2.88.0",
"rwv-sqlite": "git+https://github.com/kremio/rwv-sqlite-js.git",
"rwv-scraper-pipeline": "git+https://github.com/kremio/rwv-scraper-pipeline.git",
"timezonecomplete": "^5.6.2"
},
"devDependencies": {
Expand Down
84 changes: 0 additions & 84 deletions pipeline.js

This file was deleted.

216 changes: 0 additions & 216 deletions scrape.js

This file was deleted.

21 changes: 19 additions & 2 deletions scraper.js
@@ -1,8 +1,25 @@
//NOTE: When morph.io executes the scraper, it sets process.env.NODE_ENV
//to 'production'
const scraperPipeline = require('./pipeline')
const path = require('path')
const scraperPipeline = require('rwv-scraper-pipeline')
const {urlOfPage, pageNumberOfURL, DEFAULT_INDEX_URL } = require('./lib/constants')

scraperPipeline()
const options = {
//An async function that scrapes a page of reports index
scrapeIndex: require('./lib/index'),
//An async function that scrapes a report
scrapeReport: require('./lib/report'),
//Return the URL for the given page number
urlOfPage,
//Return the page number for the given page URL
pageNumberOfURL,
//The URL to the first page of reports
startAtPageURL: DEFAULT_INDEX_URL,
//Path to the database config.json
pathToDbConfig: path.resolve(__dirname,'./config/database.json')
}

scraperPipeline( options )
.catch( (e) => {
console.error(e)
console.log(e.stack)
Expand Down

0 comments on commit 5eb90c6

Please sign in to comment.