Skip to content
This repository has been archived by the owner on Aug 12, 2021. It is now read-only.

Commit

Permalink
gzip support
Browse files Browse the repository at this point in the history
  • Loading branch information
ondrs committed Jun 23, 2014
1 parent ad4844e commit 362a4b6
Showing 1 changed file with 90 additions and 44 deletions.
134 changes: 90 additions & 44 deletions lib/index.js
Expand Up @@ -7,6 +7,7 @@ var async = require('async'),
events = require('events'),
Q = require('q'),
jschardet = require('jschardet'),
zlib = require('zlib'),
encoding = require('encoding');

/**
Expand Down Expand Up @@ -40,7 +41,7 @@ util.inherits(Krawler, events.EventEmitter);
* @type {string}
* @const
*/
Krawler.prototype.VERSION = '0.3.2';
Krawler.prototype.VERSION = '0.3.3';

/**
*
Expand Down Expand Up @@ -183,68 +184,113 @@ Krawler.prototype.fetchUrl = function(url) {
return;
}

try {

var data = body;
if(response.headers['content-encoding'] == 'gzip') {

if(self.options_.forceUTF8) {
data = self.convertToUTF8(data);
}
self.unzip(body)
.then(function(data) {
return self.formatData_(data)
})
.then(function(data) {

switch (self.options_.parser) {
case 'cheerio':
try {
deferred.resolve({
data: cheerio.load(data),
response: response
});
} catch (e) {
deferred.reject(e);
}
break;
deferred.resolve({
data: data,
response: response
});
})
.catch(deferred.reject);

case 'json':
try {
deferred.resolve({
data: JSON.parse(data),
response: response
});
} catch (e) {
deferred.reject(e);
}
break;
} else {

case 'xml':
parseString(data, function (err, xml) {
if(err) {
deferred.reject(err);
} else {
deferred.resolve({
data: xml,
response: response
});
}
});
break;
self.formatData_(body)
.then(function(data) {

default :
deferred.resolve({
data: data,
response: response
});
break;
}
})
.catch(deferred.reject);
}

} catch (e) {
deferred.reject(e);

});

return deferred.promise;
};


/**
* @param {string} data
* @returns {Q.promise}
*/
Krawler.prototype.unzip = function(data) {
var deferred = Q.defer();

zlib.gunzip(data, function(err, data) {
if(err) {
deferred.reject(err);
return;
}

var buffer = new Buffer(data);
deferred.resolve(buffer.toString());
});

return deferred.promise;
};


/**
*
* @param {string} data
* @returns {Q.promise}
* @private
*/
Krawler.prototype.formatData_ = function(data) {
var self = this,
deferred = Q.defer();

if(self.options_.forceUTF8) {
data = self.convertToUTF8(data);
}

switch (self.options_.parser) {
case 'cheerio':
try {
deferred.resolve(cheerio.load(data));
} catch (e) {
deferred.reject(e);
}
break;

case 'json':
try {
deferred.resolve(JSON.parse(data));
} catch (e) {
deferred.reject(e);
}
break;

case 'xml':
parseString(data, function (err, xml) {
if(err) {
deferred.reject(err);
} else {
deferred.resolve(xml);
}
});
break;

default :
deferred.resolve(data);
break;
}

return deferred.promise;
};


/**
*
* @param {string} string
Expand Down

0 comments on commit 362a4b6

Please sign in to comment.