Skip to content

Commit

Permalink
Added jsdom env (#79)
Browse files Browse the repository at this point in the history
  • Loading branch information
maZahaca committed Sep 15, 2016
1 parent 9358087 commit 1f504c9
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 4 deletions.
1 change: 1 addition & 0 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module.exports = {
Environment: require('./lib/Environment'),
PhantomEnvironment: require('./lib/PhantomEnvironment'),
JsDOMEnvironment: require('./lib/JsDOMEnvironment'),

Parser: require('./lib/Parser')
};
140 changes: 140 additions & 0 deletions lib/JsDOMEnvironment.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
const Environment = require('./Environment'),
debug = require('debug')('JsDOMEnvironment'),
_ = require('lodash'),
jsdom = require('jsdom'),
path = require('path'),
vow = require('vow'),
vm = require('vm');

const defaultOptions = {
// Custom environment options
snapshot: false,
snapshotDir: 'snapshots',
proxy: null,
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/600.7.12 (KHTML, like Gecko) Version/8.0.7 Safari/600.7.12',
};

/**
* @param {object} options
* @constructor
*/
function JsDOMEnvironment(options) {
debug('Initializing...');
this._options = _.defaults(_.clone(options) || {}, defaultOptions);
this._proxy = this._options.proxy;
this._url = options.url;

if (!this._url) {
throw new Error('You must pass `url` to JsDOMEnvironment');
}
this._window = null;
}

JsDOMEnvironment.prototype = _.create(Environment.prototype, /**@lends JsDOMEnvironment*/{

/**
* Prepare environment
* @returns {Promise}
*/
prepare() {
debug('Preparing...');
const deferred = vow.defer();

const params = {
url: this._url,
scripts: ['file:' + path.resolve(__dirname, '../vendor/sizzle.min.js')],
done: (error, window) => {
debug('Page is initialized in JsDom');
if (error) {
return deferred.reject(error);
}
this._window = window;
deferred.resolve();
}
};
params.userAgent = this.getUserAgent();
const proxy = this.getProxy();
if (proxy) {
params.proxy = proxy;
}

jsdom.env(params);
return deferred.promise();
},

/**
* Tear down environment
* @returns {Promise}
*/
tearDown() {
debug('Tear down...');
if (this._window) {
this._window.close();
}
return Promise.resolve();
},

/**
* EvaluateJs in the environment
* @returns {Promise}
*/
evaluateJs(...args) {
debug('.evaluateJs() has called');

let evalFunc = args.pop();
if (typeof evalFunc !== 'function') {
throw new Error('You must pass function as last argument to JsDOMEnvironment.evaluateJs');
}

const sandbox = {
window: this._window,
document: this._window.document,
Sizzle: this._window.Sizzle,
args,
result: null
};
vm.createContext(sandbox);
evalFunc = String(evalFunc);
vm.runInContext(`const fn = ${evalFunc}; result = fn(...args);`, sandbox);

return Promise.resolve(sandbox.result);
},

/**
* @return {string|null}
* @private
*/
getProxy() {
if (!this._proxy) {
return null;
}

let proxy = '';
if (this._proxy.username) {
proxy += this._proxy.username;
}
if (this._proxy.password) {
proxy += `:${this._proxy.password}`;
}
if (proxy) {
proxy += '@';
}
proxy += `${this._proxy.host}:${this._proxy.port}`;

return proxy;
},

/**
* @returns {string}
* @private
*/
getUserAgent() {
let userAgent = this._options.userAgent;
if (Array.isArray(userAgent)) {
userAgent = _.sample(this._options.userAgent);
}
return userAgent;
},
});

module.exports = JsDOMEnvironment;
8 changes: 4 additions & 4 deletions lib/Parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ const MAX_MILESTONE_ATTEMPTS = 2;
* @param {object} options.pagination
* @constructor
*/
function Parser (options) {
function Parser(options) {
this._env = options.environment;
this.clearDom = options.clearDom || false;
this._scopes = [];
Expand Down Expand Up @@ -581,7 +581,7 @@ Parser.prototype = {
.evaluateJs(this._getSelector(), /* @covignore */ function(selector) {
return Sizzle(selector).length;
})
.then(function(nodesCount) {
.then((nodesCount) => {
debug('parsing %s nodes', nodesCount);
if (!nodesCount) {
return [];
Expand All @@ -599,8 +599,8 @@ Parser.prototype = {
this._pushScope(scope.scope, scope.parentScope);
return results;
}, this);
}, this)
.then(function(results) {
})
.then((results) => {
debug('._parseGridRule() results %o', results);
return results;
});
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
},
"keywords": [
"phantomjs",
"jsdom",
"browser",
"parser",
"crawler",
Expand Down Expand Up @@ -51,6 +52,7 @@
},
"dependencies": {
"debug": "^2.2.0",
"jsdom": "^9.4.2",
"lodash": "^3.10.1",
"minimist": "^1.2.0",
"mkdir-p": "0.0.7",
Expand Down

0 comments on commit 1f504c9

Please sign in to comment.