Permalink
Browse files

NEW: Support options.html argument in the method scrapinode.createScr…

…aper().

Allow to scrap from the directly from the available html in options.html instead of retrieving the content available at the given url (options.url).
  • Loading branch information...
1 parent f9be600 commit c46b16810350477db09a6c90c8ccd40eeaa91763 @lbdremy committed Feb 28, 2013
Showing with 41 additions and 8 deletions.
  1. +8 −1 lib/browser.js
  2. +4 −7 lib/scrapinode.js
  3. +29 −0 test/scrap-from-html-test.js
View
@@ -10,7 +10,7 @@ var jsdom = require('jsdom'),
HTTPError = require('httperror');
/**
- * JQuery dependencies
+ * jQuery dependencies
*/
var jqueryExt = fs.readFileSync(__dirname + '/../deps/jquery-regex-selector.js').toString();
@@ -26,6 +26,13 @@ var jquery = fs.readFileSync(__dirname + '/../deps/jquery-1.6.2.min.js').toStrin
*/
exports.load = function(options,callback){
+
+ if(options.html){
+ return process.nextTick(function(){
+ buildDOM(options.html,options.engine,options.url,callback);
+ });
+ }
+
var headers = {
'user-agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.107 Safari/535.1',
'accept' : 'text/html, application/xhtml+xml, application/xml; q=0.9',
View
@@ -49,6 +49,9 @@ function Scrapinode(engine){
* Create a scraper for the given `url` with the given `engine`
*
* @param {String|Object} options - url of the page or a set of options in this case options.url is mandatory
+ * @param {String} options.url - url of the page
+ * @param {String} [engine='this.engine'] - name of the engine (jsdom or cheerio)
+ * @param {String} [html=undefined] - HTML content
* @param {Function} callback -
* @param {Error} callback().err -
* @param {Scraper} callback().scraper -
@@ -66,13 +69,7 @@ Scrapinode.prototype.createScraper = function (options,callback){
};
}
- /* TODO re-enable HTML feature
- if(url.match(/^https?:/)){
- options.url = url;
- }else{
- options.html = url;
- }
- */
+ if(!options.engine) options.engine = this.engine;
browser.load(options,function(err,window){
if(err) return callback(err);
@@ -0,0 +1,29 @@
+/**
+ * Modules dependencies
+ */
+
+var mocha = require('mocha'),
+ assert = require('chai').assert,
+ libPath = process.env['SCRAPINODE_COV'] ? '../lib-cov' : '../lib',
+ scrapinode = require( libPath + '/scrapinode');
+
+// Test suite
+
+describe('scrapinode#createScraper({url : "...", html : "..."},callback)',function(){
+ describe('when the property "html" is given in the "options" parameters',function(){
+ it('should use the value of the property "html" as the content of the given "url"',function(done){
+ scrapinode.use('nimportequoi.fr','title',function(window){
+ return window.$('title').text();
+ });
+ var options = {
+ url : 'http://nimportequoi.fr',
+ html : '<!DOCTYPE html><html><head><title>Raw HTML page</title></head><body></body></html>'
+ };
+ scrapinode.createScraper(options,function(err,scraper){
+ assert.isNull(err);
+ assert.equal(scraper.get('title'),'Raw HTML page');
+ done();
+ });
+ });
+ });
+});

0 comments on commit c46b168

Please sign in to comment.