Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added jsdom env #79

Merged
merged 3 commits into from
Sep 15, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module.exports = {
Environment: require('./lib/Environment'),
PhantomEnvironment: require('./lib/PhantomEnvironment'),
JsDOMEnvironment: require('./lib/JsDOMEnvironment'),

Parser: require('./lib/Parser')
};
140 changes: 140 additions & 0 deletions lib/JsDOMEnvironment.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
const Environment = require('./Environment'),
debug = require('debug')('JsDOMEnvironment'),
_ = require('lodash'),
jsdom = require('jsdom'),
path = require('path'),
vow = require('vow'),
vm = require('vm');

const defaultOptions = {
// Custom environment options
snapshot: false,
snapshotDir: 'snapshots',
proxy: null,
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/600.7.12 (KHTML, like Gecko) Version/8.0.7 Safari/600.7.12',
};

/**
* @param {object} options
* @constructor
*/
function JsDOMEnvironment(options) {
debug('Initializing...');
this._options = _.defaults(_.clone(options) || {}, defaultOptions);
this._proxy = this._options.proxy;
this._url = options.url;

if (!this._url) {
throw new Error('You must pass `url` to JsDOMEnvironment');
}
this._window = null;
}

JsDOMEnvironment.prototype = _.create(Environment.prototype, /**@lends JsDOMEnvironment*/{

/**
* Prepare environment
* @returns {Promise}
*/
prepare() {
debug('Preparing...');
const deferred = vow.defer();

const params = {
url: this._url,
scripts: ['file:' + path.resolve(__dirname, '../vendor/sizzle.min.js')],
done: (error, window) => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just

done(error, window) {

debug('Page is initialized in JsDom');
if (error) {
return deferred.reject(error);
}
this._window = window;
deferred.resolve();
}
};
params.userAgent = this.getUserAgent();
const proxy = this.getProxy();
if (proxy) {
params.proxy = proxy;
}

jsdom.env(params);
return deferred.promise();
},

/**
* Tear down environment
* @returns {Promise}
*/
tearDown() {
debug('Tear down...');
if (this._window) {
this._window.close();
}
return Promise.resolve();
},

/**
* EvaluateJs in the environment
* @returns {Promise}
*/
evaluateJs(...args) {
debug('.evaluateJs() has called');

let evalFunc = args.pop();
if (typeof evalFunc !== 'function') {
throw new Error('You must pass function as last argument to JsDOMEnvironment.evaluateJs');
}

const sandbox = {
window: this._window,
document: this._window.document,
Sizzle: this._window.Sizzle,
args,
result: null
};
vm.createContext(sandbox);
evalFunc = String(evalFunc);
vm.runInContext(`const fn = ${evalFunc}; result = fn(...args);`, sandbox);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just ${evalFunc.toString()}, it's not cool to change type of variable

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, ok, ignore my comments, of course.. of course.. ))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

damn it. will fix it now


return Promise.resolve(sandbox.result);
},

/**
* @return {string|null}
* @private
*/
getProxy() {
if (!this._proxy) {
return null;
}

let proxy = '';
if (this._proxy.username) {
proxy += this._proxy.username;
}
if (this._proxy.password) {
proxy += `:${this._proxy.password}`;
}
if (proxy) {
proxy += '@';
}
proxy += `${this._proxy.host}:${this._proxy.port}`;

return proxy;
},

/**
* @returns {string}
* @private
*/
getUserAgent() {
let userAgent = this._options.userAgent;
if (Array.isArray(userAgent)) {
userAgent = _.sample(this._options.userAgent);
}
return userAgent;
},
});

module.exports = JsDOMEnvironment;
8 changes: 4 additions & 4 deletions lib/Parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ const MAX_MILESTONE_ATTEMPTS = 2;
* @param {object} options.pagination
* @constructor
*/
function Parser (options) {
function Parser(options) {
this._env = options.environment;
this.clearDom = options.clearDom || false;
this._scopes = [];
Expand Down Expand Up @@ -581,7 +581,7 @@ Parser.prototype = {
.evaluateJs(this._getSelector(), /* @covignore */ function(selector) {
return Sizzle(selector).length;
})
.then(function(nodesCount) {
.then((nodesCount) => {
debug('parsing %s nodes', nodesCount);
var scope = this._popScope();
return this
Expand All @@ -596,8 +596,8 @@ Parser.prototype = {
this._pushScope(scope.scope, scope.parentScope);
return results;
}, this);
}, this)
.then(function(results) {
})
.then((results) => {
debug('._parseGridRule() results %o', results);
return results;
});
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
},
"keywords": [
"phantomjs",
"jsdom",
"browser",
"parser",
"crawler",
Expand Down Expand Up @@ -51,6 +52,7 @@
},
"dependencies": {
"debug": "^2.2.0",
"jsdom": "^9.4.2",
"lodash": "^3.10.1",
"minimist": "^1.2.0",
"mkdir-p": "0.0.7",
Expand Down