Browse files

added pageSettings config property which exposes phantomjs page.settings

  • Loading branch information...
1 parent d373070 commit f9278f04687dd5b65ae3fcd52891eb244cd196c8 @thiagof committed Jun 17, 2012
Showing with 32 additions and 14 deletions.
  1. +32 −14 pjscrape.js
View
46 pjscrape.js
@@ -35,7 +35,18 @@ var pjs = (function(){
writer: 'stdout',
format: 'json',
logFile: 'pjscrape_log.txt',
- outFile: 'pjscrape_out.txt'
+ outFile: 'pjscrape_out.txt',
+ pageSettings: {
+ // set up console output
+ onConsoleMessage: function(msg, line, id) {
+ // kill initialization message
+ if (msg.indexOf('___') === 0) return;
+ id = id || 'injected code';
+ if (line) msg += ' (' + id + ' line ' + line + ')';
+ log.msg('CLIENT: ' + msg);
+ },
+ onAlert: function(msg) { log.alert('CLIENT: ' + msg) }
+ }
};
var suites = [];
@@ -65,9 +76,20 @@ var pjs = (function(){
function extend(obj) {
Array.prototype.slice.call(arguments, 1).forEach(function(source) {
for (var prop in source) {
- if (source[prop] !== void 0) obj[prop] = source[prop];
+ try {
+ //recursively merge object properties
+ if ( source[prop].constructor==Object ) {
+ obj[prop] = extend(obj[prop], source[prop]);
+ } else {
+ if (source[prop] !== void 0) obj[prop] = source[prop];
+ }
+ } catch(e) {
+ // Property in destination object not set; create it and set its value.
+ obj[prop] = source[prop];
+ }
}
});
+
return obj;
};
@@ -459,17 +481,7 @@ var pjs = (function(){
suiteq = [];
// create a single WebPage object for reuse
- var page = require('webpage').create({
- // set up console output
- onConsoleMessage: function(msg, line, id) {
- // kill initialization message
- if (msg.indexOf('___') === 0) return;
- id = id || 'injected code';
- if (line) msg += ' (' + id + ' line ' + line + ')';
- log.msg('CLIENT: ' + msg);
- },
- onAlert: function(msg) { log.alert('CLIENT: ' + msg) }
- });
+ var page = require('webpage').create(config.pageSettings);
// add waitFor method
page.waitFor = function(test, callback) {
@@ -645,6 +657,7 @@ var pjs = (function(){
var suite = this,
opts = suite.opts,
page = SuiteManager.getPage();
+
log.msg('Opening ' + url);
// set up callback to look for response codes
page.onResourceReceived = function(res) {
@@ -660,6 +673,10 @@ var pjs = (function(){
console.log('requested: ' + JSON.stringify(req, undefined, 4));
}
};
+
+ // set user defined pageSettings
+ page.settings = extend(page.settings, config.pageSettings);
+
// run the scrape
page.open(url, function(status) {
// check for load errors
@@ -787,6 +804,7 @@ var pjs = (function(){
log.msg('Saved ' + writer.count() + ' items');
phantom.exit();
});
+
// make all suites
suites.forEach(function(suite, i) {
SuiteManager.add(new ScraperSuite(
@@ -865,6 +883,6 @@ if (!phantom.args.length) {
}
});
}
+
// start the scrape
pjs.init();
-

0 comments on commit f9278f0

Please sign in to comment.