Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

FIX: Remove old tests.

  • Loading branch information...
commit 2e436d700024feb3b6399b24d608c86f8b45fcee 1 parent 6d11958
Remy Loubradou authored
1  .gitignore
View
@@ -1 +1,2 @@
node_modules
+old_test
139 test/scrapinode-test.js
View
@@ -1,139 +0,0 @@
-/**
- * Modules dependencies
- */
-
-var vows = require('vows'),
- assert = require('assert'),
- scrapinode = require('./../main');
- Scraper = require('./../lib/actors/scraper')
-// Vows test suite
-var suite = vows.describe('Scrapinode');
-
-// Elements for the test suite
-var pathScrapulesDir = __dirname + '/scrapules/';
-var urlDefault = 'http://www.comet.co.uk/p/Plasma-TVs/buy-LG-42PT353-Plasma-TV/721050';
-var urlCultBeauty = 'http://www.cultbeauty.co.uk/hair-care/hair-scalp-treatments/moroccanoil-light-oil-treatment.html';
-var urlImage = 'http://ecx.images-amazon.com/images/I/51RRbTkbfBL._SL500_AA300_.jpg';
-var urlwithHTTPRefreshMeta = 'http://www.windowsphone.com/en-US/apps/82a23635-5bd9-df11-a844-00237de2db9e';
-
-suite.addBatch({
- 'Load a scrapule in scrapinode' : {
- topic : function(){
- var useScrapules = function(){ scrapinode.use(pathScrapulesDir)}
- return useScrapules
- },
- 'should happen without throwing errors' : function(useScrapules){
- assert.doesNotThrow(useScrapules,Error)
- }
- }
-}).addBatch({
- 'Create a scraper' : {
- 'to grab relevant elements in the HTML page thanks to its url' : {
- topic : function(){
- scrapinode.init();
- scrapinode.createScraper(urlDefault,this.callback);
- },
- 'should happen without error' : function(err,scraper){
- assert.isNull(err);
- },
- 'should give a Scraper object': function(err,scraper){
- assert.instanceOf(scraper,Scraper);
- },
- 'should give a Scraper object able to retrieve the title of the page' : function(err,scraper){
- assertStringNotEmpty(scraper.get('title'));
- },
- 'should give a Scraper object able to retrieve a description of the page' : function(err,scraper){
- assertArrayNotEmpty(scraper.get('description'));
- },
- 'should give a Scraper object able to retrieve images of the page' : function(err,scraper){
- assertArrayNotEmpty(scraper.get('images'));
- }
- },
- 'to grab relevant elements in the HTML page thanks to its url using a specific scrapule determined by the domain' : {
- topic : function(){
- scrapinode.use(pathScrapulesDir);
- scrapinode.createScraper(urlCultBeauty,this.callback)
- },
- 'should happen without error' : function(err,scraper){
- assert.isNull(err);
- },
- 'should give a Scraper object' : function(err,scraper){
- assert.instanceOf(scraper,Scraper);
- },
- 'should give a Scraper object able to retrieve the title of the page' : function(err,scraper){
- assertStringNotEmpty(scraper.get('title'));
- },
- 'should give a Scraper object able to retrieve a description of the page' : function(err,scraper){
- assertArrayNotEmpty(scraper.get('description'));
- },
- 'should give a Scraper object able to retrieve images of the page' : function(err,scraper){
- assertArrayNotEmpty(scraper.get('images'));
- },
- 'should give a Scraper object able to retrieve the price of the product' : function(err,scraper){
- var price = scraper.get('price');
- assert.isNotEmpty(price);
- assert.match(price,/([0-9,]{1,}(\.?[0-9]{1,}))/)
- }
- },
- 'to grab relevant elements in the HTML page where the link given point to an image.' : {
- topic : function(){
- scrapinode.createScraper(urlImage,this.callback)
- },
- 'should happen without error' : function(err,scraper){
- assert.isNull(err);
- },
- 'should give a Scraper object' : function(err,scraper){
- assert.instanceOf(scraper,Scraper);
- },
- 'should give a Scraper object able to retrieve images of the page' : function(err,scraper){
- assertArrayNotEmpty(scraper.get('images'));
- },
- },
- 'to grab relevant elements in the HTML page where the HTTP-REFRESH meta tag must be followed' : {
- topic : function(){
- scrapinode.createScraper(urlwithHTTPRefreshMeta,this.callback)
- },
- 'should give a Scraper object' : function(err,scraper){
- assert.instanceOf(scraper,Scraper);
- },
- 'should give a Scraper object able to retrieve the title of the page' : function(err,scraper){
- assertStringNotEmpty(scraper.get('title'));
- },
- 'should give a Scraper object able to retrieve images of the page' : function(err,scraper){
- assertArrayNotEmpty(scraper.get('images'));
- }
- },
- 'to grab relevant elements in the HTML page given in argument' : {
- topic : function(){
- var html = '<!DOCTYPE html><html><head><title>Blabla.com</title></head><body><img src="http://blabla.com/favicon.ico"/></body></html>';
- scrapinode.createScraper(html,this.callback);
- },
- 'should give a Scraper object' : function(err,scraper){
- assert.instanceOf(scraper,Scraper);
- },
- 'should give a Scraper object able to retrieve the title of the page' : function(err,scraper){
- var title = scraper.get('title');
- assertStringNotEmpty(title);
- assert.deepEqual(title,'Blabla.com');
- },
- 'should give a Scraper object able to retrieve images of the page' : function(err,scraper){
- var images = scraper.get('images');
- assertArrayNotEmpty(images);
- assert.deepEqual(images, ['http://blabla.com/favicon.ico'])
- }
- }
- }
-
-}).export(module)
-
-// Macros
-
-function assertStringNotEmpty(title){
- assert.isNotEmpty(title);
- assert.isString(title);
-}
-
-function assertArrayNotEmpty(description){
- assert.isNotEmpty(description);
- assert.isArray(description);
-}
20 test/scrapules/cultbeauty.js
View
@@ -1,20 +0,0 @@
-// Dependencies
-var scrap = require('./helpers/scrap');
-
-
-// Expose CultBeauty Extractors
-module.exports = exports = [
- {
- route : 'price',
- operation : scrap('price')('.price-box>.regular-price>.price')
- },
- {
- route : 'currency',
- operation : scrap('currency')('.price-box>.regular-price>.price')
- },
- {
- route : 'description',
- operation : scrap('description')([ { path : '#tab-product-why-cult>div>div' }])
- }
-];
-
132 test/scrapules/helpers/scrap.js
View
@@ -1,132 +0,0 @@
-// Modules dependencies
-
-// Expose Singleton Scrap
-
-module.exports = exports = new Scrap();
-
-function Scrap(){
- var self = this;
- return function(content){
- if(self[content]) return self[content];
- }
-}
-
-// TODO DRY description, images, title, price and currency.
-
-Scrap.prototype.description = function(paths){
- return function($,url){
- var descriptions = [];
- function add(element,format){
- var text = $(element).text();
- if(text) text = text.trim().replace(/\n|\t|\r/g,'');
- if(format && text) text = format(text);
- if(text) descriptions.push(text);
- }
- for(var i = 0; i < paths.length; i++){
- var path = paths[i].path || paths[i];
- var each = paths[i].each || false;
- var format = paths[i].format;
- if(each){
- $(path).each(function(){
- add(this,format);
- });
- }else{
- add(path,format);
- }
- }
- return descriptions
- };
-}
-
-Scrap.prototype.images = function(paths){
- return function($,url){
- var thumbs = [];
- function add(element,format){
- var src = $(element).attr('src');
- if(src) src = src.trim().replace(/\n|\t|\r/g,'');
- if(format && src) src = format(src);
- if(src) thumbs.push(src);
- }
- for(var i = 0; i < paths.length; i++){
- var path = paths[i].path || paths[i];
- var each = paths[i].each || false;
- var format = paths[i].format;
- if(each){
- $(path).each(function(){
- add(this,format);
- });
- }else{
- add(path,format);
- }
- }
- return thumbs
- };
-}
-
-Scrap.prototype.title = function(path){
- return function($,url){
- var title;
- function find(path){
- var title = $(path).text();
- if(title) title = title.trim().replace(/\n|\t|\r/g,'');
- return title
- }
- if(typeof path === 'string'){
- title = find(path);
- }else{
- // Here path is considered to be an Array Object
- for( var i=0; i < path.length; i++){
- title = find(path[i]);
- if(title) break;
- }
- }
- return title;
- };
-}
-
-Scrap.prototype.price = function(path){
- return function($,url){
- var price;
- function find(path){
- var price = $(path).text();
- if(price) price = price.trim().replace(/\n|\t|\r| /g,'').replace(/\$|£||EUR|GBP|USD|/gi,'');
- var pattern = /([0-9,]{1,}(\.?[0-9]{1,}))/;
- var rests = price.match(pattern);
- if(rests) price = rests[1];
- return price;
- }
- if(typeof path === 'string'){
- price = find(path);
- }else{
- // Here path is considered to be an Array Object
- for( var i=0; i < path.length; i++){
- price = find(path[i]);
- if(price) break;
- }
- }
- return price;
- };
-}
-
-Scrap.prototype.currency = function(path){
- return function($,url){
- var currency;
- function find(path){
- var currency = $(path).text();
- if(currency.match(/GBP|£|/gi) !== null ){currency = 'GBP';}
- else if(currency.match(/EUR|/gi) !== null ){currency = 'EUR';}
- else if(currency.match(/USD|\$/gi) !== null ){ currency = 'USD';}
- return currency;
- }
- if(typeof path === 'string'){
- currency = find(path);
- }else{
- // Here path is considered to be an Array Object
- for( var i=0; i < path.length; i++){
- currency = find(path[i]);
- if(currency) break;
- }
- }
- return currency;
- };
-}
Please sign in to comment.
Something went wrong with that request. Please try again.