Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge pull request #20 from thiagof/master

Added option to expose phantomjs's page.settings
  • Loading branch information...
commit 19fd7c11cee38c588cc1d2a2b0002d5f67f58ea8 2 parents 9827e8e + fcea6c0
@nrabinowitz authored
Showing with 21 additions and 3 deletions.
  1. +21 −3 pjscrape.js
View
24 pjscrape.js
@@ -35,7 +35,8 @@ var pjs = (function(){
writer: 'stdout',
format: 'json',
logFile: 'pjscrape_log.txt',
- outFile: 'pjscrape_out.txt'
+ outFile: 'pjscrape_out.txt',
+ pageSettings: { },
};
var suites = [];
@@ -65,9 +66,20 @@ var pjs = (function(){
function extend(obj) {
Array.prototype.slice.call(arguments, 1).forEach(function(source) {
for (var prop in source) {
- if (source[prop] !== void 0) obj[prop] = source[prop];
+ try {
+ //recursively merge object properties
+ if ( source[prop].constructor==Object ) {
+ obj[prop] = extend(obj[prop], source[prop]);
+ } else {
+ if (source[prop] !== void 0) obj[prop] = source[prop];
+ }
+ } catch(e) {
+ // Property in destination object not set; create it and set its value.
+ obj[prop] = source[prop];
+ }
}
});
+
return obj;
};
@@ -645,6 +657,7 @@ var pjs = (function(){
var suite = this,
opts = suite.opts,
page = SuiteManager.getPage();
+
log.msg('Opening ' + url);
// set up callback to look for response codes
page.onResourceReceived = function(res) {
@@ -660,6 +673,10 @@ var pjs = (function(){
console.log('requested: ' + JSON.stringify(req, undefined, 4));
}
};
+
+ // set user defined pageSettings
+ page.settings = extend(page.settings, config.pageSettings);
+
// run the scrape
page.open(url, function(status) {
// check for load errors
@@ -787,6 +804,7 @@ var pjs = (function(){
log.msg('Saved ' + writer.count() + ' items');
phantom.exit();
});
+
// make all suites
suites.forEach(function(suite, i) {
SuiteManager.add(new ScraperSuite(
@@ -865,6 +883,6 @@ if (!phantom.args.length) {
}
});
}
+
// start the scrape
pjs.init();
-
Please sign in to comment.
Something went wrong with that request. Please try again.