Permalink
Browse files

bugfixes

  • Loading branch information...
1 parent 3a55f1f commit 12be8ee80532bf29c289ad4ecd52072a4f9a62a3 @claudehohl claudehohl committed Mar 4, 2011
Showing with 10 additions and 8 deletions.
  1. BIN libxmljs.node
  2. +1 −1 node-couch.js
  3. +1 −2 settings.js
  4. +8 −5 spider.js
View
Binary file not shown.
View
@@ -90,7 +90,7 @@ function _interact(verb, path, successStatus, options, host) {
request.addListener('response', function(response) {
var responseBody = ""
- response.setBodyEncoding("utf8");
+ response.setEncoding("utf8");
response.addListener("data", function(chunk) {
responseBody += chunk
View
@@ -1,6 +1,5 @@
-exports.couchhost = '192.168.175.128:5984';
+exports.couchhost = '127.0.0.1:5984';
exports.couchbase = 'spider';
exports.targethost = 'finalfantasy.wikia.com';
exports.max_streams = 3;
-exports.max_streams = 3;
exports.crawl_timeout = 500;
View
@@ -23,7 +23,7 @@ function get_content_type(headers) {
return headers['content-type'].split(';')[0];
}
-var libxml = require("./libxmljs"),
+var libxml = require("libxmljs"),
http = require("http"),
url = require("url"),
settings = require("./settings"),
@@ -76,7 +76,7 @@ var getPage = function(URL, connection, callback) {
var request = connection.request("GET", URL, {"host": settings.targethost});
request.addListener('response', function (response) {
- response.setBodyEncoding("utf8");
+ response.setEncoding("utf8");
var text = '';
@@ -121,8 +121,11 @@ var cleanPage = function(parsed_html) {
var pageTitle = function(parsed_html) {
var title = parsed_html.get('//head/title');
+ if(title){
+ title = title.text();
+ }
- return title.text();
+ return title;
}
var known_pages = [];
@@ -173,7 +176,7 @@ var crawl_page = function (URL, connection, stream_id) {
sys.puts('Bad parsed page: ' + URL);
}
} else {
- sys.puts('Strange content type: ' + content-type);
+ sys.puts('Strange content type: ' + content_type);
}
} else if (code == 301 || code == 303) {
@@ -216,4 +219,4 @@ var save_page = function (URL, title, text) {
crawl_page('/', target_site, 1);
-num_of_streams = 1;
+num_of_streams = 1;

0 comments on commit 12be8ee

Please sign in to comment.