tests, readme file

ondrs · Feb 8, 2014 · b15ad9a · b15ad9a
1 parent 48b8354
commit b15ad9a
Show file tree

Hide file tree

Showing 6 changed files with 181 additions and 11 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 node_modules
+.log
diff --git a/README.md b/README.md
@@ -0,0 +1,37 @@
+# node Krawler
+
+## How to install
+```
+npm install krawler
+```
+
+## Basic example
+
+```javascript
+var crawler = new Krawler;
+
+crawler
+    .queue('http://ondraplsek.cz')
+    .on('data', function($, url, response) {
+
+        // $ - cheerio instance
+        // url of the current webpage
+        // response object from mikeal/request
+
+    })
+    .on('err', function(err, url) {
+        // there has ben an 'err' on 'url'
+    })
+    .on('end', function() {
+        // all URLs has been fetched
+    });
+```
+
+Krawler provides three types of built in parses
+    - cheerio (default)
+    - xml
+    - json
+
+```javascript
+```
+
diff --git a/lib/krawler.js b/lib/krawler.js
@@ -40,7 +40,7 @@ util.inherits(Krawler, events.EventEmitter);
  * @type {number}
  * @const
  */
-Krawler.prototype.VERSION = 0.5;
+Krawler.prototype.VERSION = 0.1;
 
 /**
  *
@@ -53,6 +53,7 @@ Krawler.prototype._options = {};
 /**
  *
  * @param {Array|string} urls
+ * @returns {Krawler}
  */
 Krawler.prototype.queue = function(urls) {
   var self = this;
@@ -80,11 +81,11 @@ Krawler.prototype.queue = function(urls) {
       });
 
   }, function(err) {
-    // no error
+    // no error can
     self.emit('end');
   });
 
-
+  return this;
 };
 
 

diff --git a/license.md b/license.md
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2013 Ondřej Plšek (http://www.ondraplsek.cz)
+Copyright (c) 2014 Ondřej Plšek (http://www.ondraplsek.cz)
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "krawler",
-  "version": "0.3.0",
+  "version": "0.1.0",
   "description": "Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously.",
   "keywords": [
     "dom",
@@ -28,10 +28,9 @@
   {
     "type": "git",
     "url": "https://github.com/ondrs/node-krawler.git"
-  }
-  ,
+  },
   "dependencies": {
-    "underscore": "~1.5.2"
+    "underscore": "~1.5.2",
     "async": "0.2.x",
     "xml2js": "0.2.x",
     "q": "~1.0.0",
@@ -41,13 +40,13 @@
     "request": "~2.33.0"
   },
   "devDependencies": {
-    "mocha": "~1.17.1"
+    "chai": "~1.9.0"
   },
   "scripts": {
-    "test": "node test/testrunner.js"
+    "test": "mocha test/test.js"
   },
   "engines": [
-    "node >=0.10.x"
+    "node >=0.8.x"
   ],
   "directories": {
     "lib": "lib"

diff --git a/test/test.js b/test/test.js
@@ -0,0 +1,132 @@
+var Krawler = require(__dirname + '/../lib/krawler.js');
+var expect = require('chai').expect;
+
+describe('Krawler tests', function() {
+
+  this.timeout(10000);
+
+  it('should fetch and parse HTML page', function(done) {
+
+    var crawler = new Krawler;
+
+    crawler
+      .fetchUrl('http://www.google.com')
+      .done(function(result) {
+        /** @type {cheerio} */
+        var $ = result.data;
+        expect($('title').text()).to.be.equal('Google');
+        done();
+      });
+  });
+
+  it('should fetch and parse XML page', function(done) {
+
+    var crawler = new Krawler({
+      parser: 'xml'
+    });
+
+    crawler
+      .fetchUrl('http://www.w3schools.com/xml/note.xml')
+      .done(function(result) {
+        /** @type {Object} */
+        var xml = result.data;
+        expect(xml).to.be.instanceOf(Object);
+        done();
+      });
+  });
+
+  it('should fetch and parse JSON page', function(done) {
+
+    var crawler = new Krawler({
+      parser: 'json'
+    });
+
+    crawler
+      .fetchUrl('https://graph.facebook.com/facebook')
+      .done(function(result) {
+        /** @type {Object} */
+        var json = result.data;
+        expect(json).to.be.instanceOf(Object);
+        done();
+      });
+  });
+
+  it('should fetch raw page', function(done) {
+
+    var crawler = new Krawler({
+      parser: false
+    });
+
+    crawler
+      .fetchUrl('http://www.google.com')
+      .done(function(result) {
+        var str = result.data;
+        expect(str).to.be.equal(str.toString());
+        done();
+      });
+  });
+
+
+  it('should fetch several HTML pages in queue', function(done) {
+
+    var urls = [],
+      fetched = [],
+      crawler = new Krawler;
+
+    for(var i = 0; i < 3; ++i) {
+      urls.push('https://www.google.cz/?q=' + i);
+    }
+
+    crawler
+      .queue(urls)
+      .on('data', function(data, url, response) {
+        fetched.push(urls);
+      })
+      .on('error', function(err, url) {
+        done(err);
+      })
+      .on('end', function() {
+        expect(urls.length).to.be.equal(fetched.length);
+        done();
+      })
+
+  });
+
+  it('should fetch sing HTML page in queue', function(done) {
+
+    var crawler = new Krawler;
+
+    crawler
+      .queue('https://www.google.cz')
+      .on('data', function(data, url, response) {
+        expect(url).to.be.equal('https://www.google.cz');
+      })
+      .on('error', function(err, url) {
+        done(err);
+      })
+      .on('end', function() {
+        done();
+      })
+
+  });
+
+
+  it('forceutf8 - from latin-1', function(done) {
+
+    var crawler = new Krawler({
+      forceUTF8: true
+    });
+
+    crawler
+      .fetchUrl('http://czyborra.com/charsets/iso8859.html')
+      .done(function(result) {
+        /** @type {cheerio} */
+        var $ = result.data;
+        expect($.html()).to.have.string('Jörg');
+        done();
+      });
+
+  });
+
+});
+