Join GitHub today
GitHub is home to over 50 million developers working together to host and review code, manage projects, and build software together.
Sign upGitHub is where the world builds software
Millions of developers and companies build, ship, and maintain their software on GitHub — the largest and most advanced development platform in the world.
| var util = require('util') | |
| , fs = require('fs') | |
| , spawn = require('./spawn') | |
| , proxy = require('./proxy') | |
| , errors = require('./errors') | |
| , cwd = process.cwd(); | |
| /** | |
| * Make some curl opts friendlier. | |
| */ | |
| var curl_map = { | |
| timeout: 'max-time' | |
| , redirects: 'max-redirs' | |
| , method: 'request' | |
| , useragent: 'user-agent' | |
| }; | |
| /** | |
| * Default user-agents. | |
| */ | |
| var ua_file = __dirname + '/useragents.txt' | |
| , user_agents = fs.readFileSync(ua_file).toString().split('\n').slice(0, -1) | |
| , user_agents_len = user_agents.length; | |
| /** | |
| * Default request headers. | |
| */ | |
| var default_headers = { | |
| 'Accept': '*/*' | |
| , 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3' | |
| , 'Accept-Language': 'en-US,en;q=0.8' | |
| }; | |
| /** | |
| * Make a request with cURL. | |
| * | |
| * @param {Object|String} options (optional) - see `man curl` | |
| * @param {Function} callback (optional) | |
| * @api public | |
| */ | |
| exports.request = function (options, callback) { | |
| if (arguments.length === 1) { | |
| var defaults = options; | |
| return function (options, callback) { | |
| if (typeof options === 'function') { | |
| callback = options; | |
| options = {}; | |
| } else if (typeof options === 'string') { | |
| options = { url: options }; | |
| } | |
| for (var key in defaults) { | |
| if (typeof options[key] === 'undefined') { | |
| options[key] = defaults[key]; | |
| } | |
| } | |
| exports.request.call(this, options, callback); | |
| }; | |
| } | |
| if (options.retries) { | |
| var remaining = options.retries; | |
| delete options.retries; | |
| return (function curl() { | |
| exports.request(options, function (err) { | |
| if (!err || !--remaining) { | |
| return callback.apply(this, arguments); | |
| } | |
| process.nextTick(curl); | |
| }); | |
| })(); | |
| } | |
| if (typeof options === 'string') { | |
| options = { url: options }; | |
| } else { | |
| options = exports.copy(options); | |
| } | |
| for (var key in curl_map) { | |
| if (typeof options[key] !== 'undefined') { | |
| options[curl_map[key]] = options[key]; | |
| delete options[key]; | |
| } | |
| } | |
| if (options.proxies) { | |
| if (!proxy.transform) { | |
| proxy.transform = proxy.unpack(options.key).transform; | |
| } | |
| options = proxy.transform(options); | |
| delete options.key; | |
| } | |
| var curl | |
| , curl_path = 'curl' | |
| , args = ['--silent', '--show-error', '--no-buffer'] | |
| , start = new Date | |
| , err | |
| , stdoutlen | |
| , stdout = new Buffer(stdoutlen = 0) | |
| , encoding | |
| , complete | |
| , cleanup | |
| , postprocess | |
| , require_str | |
| , require_not_str | |
| , scope = {} | |
| , cmd = 'curl' | |
| , timeout; | |
| function finish() { | |
| if (err in errors) { | |
| err = errors[err]; | |
| } | |
| callback.call(scope, err, stdout, { | |
| cmd: cmd | |
| , args: args | |
| , time: (new Date().getTime() - start.getTime()) | |
| }); | |
| complete = true; | |
| } | |
| //Allow for a custom curl path | |
| if (options.curl_path) { | |
| curl_path = options.curl_path; | |
| delete options.curl_path; | |
| } | |
| //Follow location by default | |
| if ('max-redirs' in options) { | |
| options.location = !!options['max-redirs']; | |
| } else { | |
| options.location = true; | |
| options['max-redirs'] = 3; | |
| } | |
| //Add an additional setTimeout for max-time | |
| if (options['max-time']) { | |
| timeout = setTimeout(function () { | |
| if (complete) return; | |
| stderr = 'timeout', stdout = null; | |
| finish(); | |
| if (curl && curl.kill) curl.kill('SIGKILL'); | |
| }, 1000 * options['max-time']); | |
| } | |
| //Default encoding is utf8. Set encoding = null to get a buffer | |
| if (!options.encoding && options.encoding !== null) { | |
| options.encoding = 'utf8'; | |
| } | |
| if (options.encoding) { | |
| encoding = options.encoding; | |
| if (encoding === 'ascii') { | |
| options['use-ascii'] = true; | |
| } | |
| delete options.encoding; | |
| } | |
| //Parse POST data | |
| if (options.data && typeof options.data === 'object') { | |
| var data = []; | |
| for (var key in options.data) { | |
| data.push(encodeURIComponent(key) + '=' + encodeURIComponent(options.data[key])); | |
| } | |
| options.data = data.join('&'); | |
| } | |
| //Check for the occurrence of a string and fail if not found | |
| if (options.require) { | |
| require_str = options.require; | |
| if (!Array.isArray(require_str)) { | |
| require_str = [require_str]; | |
| } | |
| delete options.require; | |
| } | |
| //Check for the occurrence of a string and fail if found | |
| if (options.require_not) { | |
| require_not_str = options.require_not; | |
| if (!Array.isArray(require_not_str)) { | |
| require_not_str = [require_not_str]; | |
| } | |
| delete options.require_not; | |
| } | |
| //Call the callback in a custom scope | |
| if (options.scope) { | |
| scope = options.scope; | |
| delete options.scope; | |
| } | |
| //Apply a post-processing function? | |
| if (options.process) { | |
| postprocess = options.process; | |
| delete options.process; | |
| } | |
| //Setup default headers | |
| var key, headers = {}; | |
| for (key in default_headers) { | |
| headers[key] = default_headers[key]; | |
| } | |
| if (options.headers) { | |
| for (key in options.headers) { | |
| key = key.replace(/[_-]/g, ' ').split(' ').map(function (str) { | |
| if (str.length) { | |
| str = str[0].toUpperCase() + str.substr(1); | |
| } | |
| return str; | |
| }).join('-'); | |
| headers[key] = options.headers[key]; | |
| } | |
| delete options.headers; | |
| } | |
| options.header = options.header || []; | |
| for (key in headers) { | |
| options.header.push(key + ': ' + headers[key]); | |
| } | |
| //Select a random user agent if one wasn't provided | |
| if (!headers['User-Agent'] && !options['user-agent']) { | |
| options['user-agent'] = user_agents[Math.random() * user_agents_len | 0]; | |
| } | |
| //Prepare curl args | |
| var key, values; | |
| for (key in options) { | |
| values = Array.isArray(options[key]) ? options[key] : [options[key]]; | |
| values.forEach(function (value) { | |
| args.push('--' + key); | |
| if (true !== value) { | |
| args.push(value); | |
| } | |
| }); | |
| } | |
| if (options.file) { | |
| cmd = 'cat'; | |
| args = [options.file]; | |
| } | |
| //Simulate the spawn? | |
| if (options.pretend) { | |
| return finish(); | |
| } | |
| //Spawn the process | |
| var child = spawn(cmd, args, { cwd: options.cwd || cwd }, function (curl) { | |
| //Collect stdout | |
| curl.stdout.on('data', function (data) { | |
| if (complete) return; | |
| var len = data.length, prev = stdout; | |
| stdout = new Buffer(len + stdoutlen); | |
| prev.copy(stdout, 0, 0, stdoutlen); | |
| data.copy(stdout, stdoutlen, 0, len); | |
| stdoutlen += len; | |
| }); | |
| //Pipe stderr to the current process? | |
| if (options.stderr) { | |
| if (options.stderr === true) { | |
| curl.stderr.pipe(process.stderr); | |
| delete options.stderr | |
| } | |
| } | |
| //Handle curl exit | |
| curl.on('exit', function (code) { | |
| try { | |
| err = code; | |
| if (complete) return; | |
| if (encoding) { | |
| stdout = stdout.toString(encoding); | |
| } | |
| if (postprocess && stdout) { | |
| stdout = postprocess(stdout); | |
| } | |
| if (require_str) { | |
| var valid = false; | |
| if (!encoding) { | |
| stdout = stdout.toString(); | |
| } | |
| for (var i = 0, l = require_str.length; i < l; i++) { | |
| if ((util.isRegExp(require_str[i]) && require_str[i].test(stdout)) | |
| || stdout.indexOf(require_str[i]) !== -1) { | |
| valid = true; | |
| break; | |
| } | |
| } | |
| if (!valid) { | |
| err = 'response does not contain required string(s)'; | |
| stdout = null | |
| } else if (!encoding) { | |
| stdout = new Buffer(stdout); | |
| } | |
| } | |
| if (require_not_str) { | |
| var valid = true; | |
| if (!encoding) { | |
| stdout = stdout.toString(); | |
| } | |
| for (var i = 0, l = require_not_str.length; i < l; i++) { | |
| if ((util.isRegExp(require_not_str[i]) && require_not_str[i].test(stdout)) | |
| || stdout.indexOf(require_not_str[i]) !== -1) { | |
| valid = false; | |
| break; | |
| } | |
| } | |
| if (!valid) { | |
| err = 'response contains bad string(s)'; | |
| stdout = null | |
| } else if (!encoding) { | |
| stdout = new Buffer(stdout); | |
| } | |
| } | |
| } catch (e) { | |
| err = typeof e === 'object' ? e.message || '' : e; | |
| } | |
| finish(); | |
| if (timeout) clearTimeout(timeout); | |
| }); | |
| }); | |
| }; | |
| /** | |
| * Expose a helper for scraping urls from a page. | |
| */ | |
| var urls = /(?:href|src|HREF|SRC)=["']?([^"' >]+)/g; | |
| exports.urls = function (data, regex) { | |
| var match, matches = []; | |
| while (match = urls.exec(data)) { | |
| if (regex && !regex.test(match[1])) { | |
| continue; | |
| } | |
| matches.push(match[1].replace(/[\r\n\t\s]/g, '')); | |
| } | |
| return matches; | |
| }; | |
| /** | |
| * A helper for handling async concurrency. | |
| */ | |
| exports.concurrent = function (input, concurrency, fn) { | |
| if (arguments.length === 3) { | |
| var len = input.length, pos = 0, remaining = concurrency; | |
| for (var i = 0; i < concurrency; i++) { | |
| (function exec() { | |
| if (pos >= len) { | |
| if (!--remaining) { | |
| fn(null, function () {}); | |
| } | |
| } else { | |
| fn(input[pos++], function () { | |
| process.nextTick(exec); | |
| }); | |
| } | |
| })(); | |
| } | |
| } else { | |
| fn = concurrency; | |
| concurrency = input; | |
| for (var i = 0; i < concurrency; i++) { | |
| (function exec() { | |
| fn(function () { | |
| process.nextTick(exec); | |
| }); | |
| })(); | |
| } | |
| } | |
| }; | |
| /** | |
| * A helper for copying an object. | |
| */ | |
| exports.copy = function (obj) { | |
| var copy = {}; | |
| for (var i in obj) { | |
| if (typeof obj[i] === 'object') { | |
| copy[i] = exports.copy(obj[i]); | |
| } else { | |
| copy[i] = obj[i]; | |
| } | |
| } | |
| return copy; | |
| }; |