Permalink
Browse files

NEW: Test schema.org and generic tags parsing.

  • Loading branch information...
1 parent 7ed41ab commit 52a5074db2633077d64703dac589d9503c1d5bef @lbdremy committed Feb 18, 2013
Showing with 158 additions and 11 deletions.
  1. +5 −4 lib/defaults/index.js
  2. +49 −0 test/resources/page-generic-tags.html
  3. +48 −0 test/resources/page-schema-org.html
  4. +56 −7 test/use-defaults-test.js
View
@@ -129,6 +129,7 @@ function scrapImage(window){
// Grab all images
if(thumbs.length < 10){
$('img').each(function(){
+ if($(this).attr('itemprop') === 'image') return;
var alt = $(this).attr('alt');
// Leave this test alone
// the selector 'img[alt="title"]' will not work if the title is like LG 42PT35342" PLASMA TV. Escaping issues.
@@ -185,21 +186,21 @@ function scrapTitle(window){
function scrapVideo(window){
var $ = window.$;
- var url = window.url;
+ var url = window.location.href;
var thumbs = [];
// Open Graph protocol by Facebook: <meta property="og:video" content="(*)"/>
$('meta').each(function(){
var property = $(this).attr('property');
var content = $(this).attr('content');
if(property === 'og:video' && content){
- thumbs.push(content);
+ thumbs.push(utils.toURL(content));
}
});
$('video, embed').each(function(){
- var html = $(this).clone().wrap('<div></div>').parent().html();
- thumbs.push(html);
+ var src = $(this).attr('src');
+ if(src) thumbs.push(utils.toURL(src,url));
});
return thumbs;
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
+<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
+<!--[if IE 8]> <html class="no-js lt-ie9"> <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
+ <head>
+ <meta charset="utf-8">
+ <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+ <title>generic tags page</title>
+ <meta name="description" content="description of the generics tag page">
+ <meta name="viewport" content="width=device-width">
+
+ <!-- Place favicon.ico and apple-touch-icon.png in the root directory -->
+
+ <link rel="stylesheet" href="css/normalize.css">
+ <link rel="stylesheet" href="css/main.css">
+ <script src="js/vendor/modernizr-2.6.2.min.js"></script>
+
+ </head>
+ <body>
+ <!--[if lt IE 7]>
+ <p class="chromeframe">You are using an <strong>outdated</strong> browser. Please <a href="http://browsehappy.com/">upgrade your browser</a> or <a href="http://www.google.com/chromeframe/?redirect=true">activate Google Chrome Frame</a> to improve your experience.</p>
+ <![endif]-->
+
+ <!-- Add your site or application content here -->
+ <p>Hello world! This is HTML5 Boilerplate.</p>
+ <img src="http://localhost/img1.png" />
+ <img src="http://localhost/img2.png" />
+ <video src="videofile.ogg" autoplay poster="posterimage.jpg">
+ Sorry, your browser doesn't support embedded videos,
+ but don't worry, you can <a href="videofile.ogg">download it</a>
+ and watch it with your favorite video player!
+ </video>
+ <embed type="video/quicktime" src="movie.mov" width="640" height="480">
+
+ <script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.0/jquery.min.js"></script>
+ <script>window.jQuery || document.write('<script src="js/vendor/jquery-1.9.0.min.js"><\/script>')</script>
+ <script src="js/plugins.js"></script>
+ <script src="js/main.js"></script>
+
+ <!-- Google Analytics: change UA-XXXXX-X to be your site's ID. -->
+ <script>
+ var _gaq=[['_setAccount','UA-XXXXX-X'],['_trackPageview']];
+ (function(d,t){var g=d.createElement(t),s=d.getElementsByTagName(t)[0];
+ g.src=('https:'==location.protocol?'//ssl':'//www')+'.google-analytics.com/ga.js';
+ s.parentNode.insertBefore(g,s)}(document,'script'));
+ </script>
+ </body>
+</html>
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
+<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
+<!--[if IE 8]> <html class="no-js lt-ie9"> <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
+ <head>
+ <meta charset="utf-8">
+ <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+ <title>localhost</title>
+ <meta name="description" content="">
+ <meta name="viewport" content="width=device-width">
+
+ <!-- Place favicon.ico and apple-touch-icon.png in the root directory -->
+
+ <link rel="stylesheet" href="css/normalize.css">
+ <link rel="stylesheet" href="css/main.css">
+ <script src="js/vendor/modernizr-2.6.2.min.js"></script>
+
+
+ </head>
+ <body>
+ <!--[if lt IE 7]>
+ <p class="chromeframe">You are using an <strong>outdated</strong> browser. Please <a href="http://browsehappy.com/">upgrade your browser</a> or <a href="http://www.google.com/chromeframe/?redirect=true">activate Google Chrome Frame</a> to improve your experience.</p>
+ <![endif]-->
+
+ <!-- Schema.org semantic -->
+ <div itemscope itemtype ="http://schema.org/Product">
+ <p itemprop="name" >Call of Duty: Modern Warfare III</p>
+ <p itemprop="description" >First Person Shooter game available on consoles and PCs.</p>
+ <img src="http://www.journaldugamer.com/files/2011/11/mw3-header.jpg" itemprop="image"/>
+ </div>
+ <!-- Add your site or application content here -->
+ <p>Hello world! This is HTML5 Boilerplate.</p>
+
+ <script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.0/jquery.min.js"></script>
+ <script>window.jQuery || document.write('<script src="js/vendor/jquery-1.9.0.min.js"><\/script>')</script>
+ <script src="js/plugins.js"></script>
+ <script src="js/main.js"></script>
+
+ <!-- Google Analytics: change UA-XXXXX-X to be your site's ID. -->
+ <script>
+ var _gaq=[['_setAccount','UA-XXXXX-X'],['_trackPageview']];
+ (function(d,t){var g=d.createElement(t),s=d.getElementsByTagName(t)[0];
+ g.src=('https:'==location.protocol?'//ssl':'//www')+'.google-analytics.com/ga.js';
+ s.parentNode.insertBefore(g,s)}(document,'script'));
+ </script>
+ </body>
+</html>
View
@@ -80,27 +80,76 @@ function runTestSuite(engine){
});
describe('and scraping by following the Schema.org specifications',function(){
describe('#get("title")',function(){
- it('should retrieve the text representating the title');
+ it('should retrieve the text representating the title',function(done){
+ scrapinode.useAll(scrapinode.defaults());
+ scrapinode.createScraper('http://localhost:1102/page-schema-org.html',function(err,scraper){
+ assert.isNull(err);
+ assert.equal(scraper.get('title'),'Call of Duty: Modern Warfare III');
+ done();
+ });
+ });
});
describe('#get("descriptions")',function(){
- it('should retrieve a list of text representating the descriptions');
+ it('should retrieve a list of text representating the descriptions',function(done){
+ scrapinode.useAll(scrapinode.defaults());
+ scrapinode.createScraper('http://localhost:1102/page-schema-org.html',function(err,scraper){
+ assert.isNull(err);
+ assert.deepEqual(scraper.get('descriptions'),['First Person Shooter game available on consoles and PCs.']);
+ done();
+ });
+ });
});
describe('#get("images")',function(){
- it('should retrieve a list of images url');
+ it('should retrieve a list of images url',function(done){
+ scrapinode.useAll(scrapinode.defaults());
+ scrapinode.createScraper('http://localhost:1102/page-schema-org.html',function(err,scraper){
+ assert.isNull(err);
+ assert.deepEqual(scraper.get('images'),['http://www.journaldugamer.com/files/2011/11/mw3-header.jpg']);
+ done();
+ });
+ });
});
});
describe('and scraping by searching the generic html tags',function(){
describe('#get("title")',function(){
- it('should retrieve the text representating the title');
+ it('should retrieve the text representating the title',function(done){
+ scrapinode.useAll(scrapinode.defaults());
+ scrapinode.createScraper('http://localhost:1102/page-generic-tags.html',function(err,scraper){
+ assert.isNull(err);
+ assert.equal(scraper.get('title'),'generic tags page');
+ done();
+ });
+ });
});
describe('#get("descriptions")',function(){
- it('should retrieve a list of text representating the descriptions');
+ it('should retrieve a list of text representating the descriptions',function(done){
+ scrapinode.useAll(scrapinode.defaults());
+ scrapinode.createScraper('http://localhost:1102/page-generic-tags.html',function(err,scraper){
+ assert.isNull(err);
+ assert.deepEqual(scraper.get('descriptions'),['description of the generics tag page']);
+ done();
+ });
+ });
});
describe('#get("images")',function(){
- it('should retrieve a list of images url');
+ it('should retrieve a list of images url',function(done){
+ scrapinode.useAll(scrapinode.defaults());
+ scrapinode.createScraper('http://localhost:1102/page-generic-tags.html',function(err,scraper){
+ assert.isNull(err);
+ assert.deepEqual(scraper.get('images'),['http://localhost/img1.png','http://localhost/img2.png']);
+ done();
+ });
+ });
});
describe('#get("videos")',function(){
- it('should retrieve a list of videos [urls or html representations of the videos]');
+ it('should retrieve a list of videos [urls or html representations of the videos]',function(done){
+ scrapinode.useAll(scrapinode.defaults());
+ scrapinode.createScraper('http://localhost:1102/page-generic-tags.html',function(err,scraper){
+ assert.isNull(err);
+ assert.deepEqual(scraper.get('videos'),['http://localhost:1102/videofile.ogg','http://localhost:1102/movie.mov']);
+ done();
+ });
+ });
});
});
});

0 comments on commit 52a5074

Please sign in to comment.