/
imagepicker.js
73 lines (64 loc) · 1.78 KB
/
imagepicker.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// Service to extract all images
var request = require("request");
var htmlparser = require("htmlparser");
var soupselect = require("soupselect");
module.exports = {
process: function(twist, callback) {
request({url: twist.url}, function(error, response, body) {
// Invalid url
if (error) {
callback({
provider: 'imagepicker',
error: 'invalid url'
});
return;
}
// Register handler
var images = [];
response.setEncoding('utf8');
var handler = new htmlparser.DefaultHandler(function (error, dom) {
// Unable to handle parser
if (error) {
callback({
provider: 'imagepicker',
error: 'unable to instanciate handler'
});
return;
}
// Retrieve all images
var imageTags = soupselect.select(dom, "img");
imageTags.forEach(function(image) {
// Ignore local image
var src = image.attribs.src;
if (!src || src.indexOf('data:') == 0) {
return;
}
// Convert to absolute URL
var url;
if (src.indexOf('http') == 0) {
url = src;
} else if (src.indexOf('//') == 0) {
url = 'http:' + src;
} else {
var dotdotslashslash = twist.url.indexOf('://');
var scheme = twist.url.substr(0, dotdotslashslash);
var serverEnd = twist.url.indexOf('/', dotdotslashslash+3);
var server = twist.url.substr(dotdotslashslash+3, (serverEnd != -1 ? serverEnd : twist.url.length)-scheme.length-3);
url = scheme + '://' + server + (src[0]!='/'?'/':'') + src;
}
// Add to images list
images.push(url);
});
// Return images set
twist.images = images;
callback({
provider: 'imagepicker',
images: images
});
});
// Launch parsing
var parser = new htmlparser.Parser(handler);
parser.parseComplete(body);
});
}
}