-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawl.js
44 lines (38 loc) · 1.17 KB
/
crawl.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
//依赖模块
var fs = require('fs');
var request = require("request");
var cheerio = require("cheerio");
var mkdirp = require('mkdirp');
//目标网址
var url = 'https://cnodejs.org/';
//本地存储目录
var dir = './images';
//代理(在host前加上代理的账号密码xxxx:xxxx@)
const proxy = 'http://eproxy.sz.intech:3128';
//创建目录
mkdirp(dir, function(err) {
if(err){
console.log(err);
}
});
//发送请求
request(url, {'proxy': proxy}, function(error, response, body) {
if(!error && response.statusCode == 200) {
var $ = cheerio.load(body);
$('img').each(function() {
var src = $(this).attr('src');
if(src.indexOf("http") < 0){
src = "http:" + src;
}
console.log('正在下载' + src);
download(src, dir, Math.floor(Math.random()*100000) + '.jpg');//src.substr(-4,4)
console.log('下载完成');
});
}
});
//下载方法
var download = function(url, dir, filename){
request.head(url, function(err, res, body){
request(url, {'proxy': proxy}).pipe(fs.createWriteStream(dir + "/" + filename));
});
};