-
Notifications
You must be signed in to change notification settings - Fork 2
/
index.js
127 lines (104 loc) · 3.2 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
var cheerio = require('cheerio');
var request = require('request');
var validator = require('validator');
var stats = {
protocols: [
'http',
'https',
'ftp'
],
require_protocol: true
};
var attributeMapping = {
'og': 'facebook'
};
function extractDomain(url) {
var domain;
//find & remove protocol (http, ftp, etc.) and get domain
if (url.indexOf("://") > -1) {
domain = url.split('/')[2];
}
else {
domain = url.split('/')[0];
}
//find & remove port number
domain = domain.split(':')[0];
return domain;
}
module.exports = function(link, cb) {
if (!link) {
cb({ message: 'Url is empty' }, null);
return;
}
if (!validator.isURL(link, stats)) {
cb({ message: 'Url is not valid' }, null);
return;
}
request(link, function(error, response, body) {
if (error || response === undefined) {
cb({ message: 'Empty response' });
return;
}
var $ = cheerio.load(body);
var meta = $('meta');
var title = $('title').text();
var images = $('img');
var metatags = {};
var images2 = [];
/* Purge invalid tags on the object */
meta = meta.filter(function(m) {
return meta[m].hasOwnProperty('attribs')
&& meta[m].attribs !== undefined
// Has either property or name:
&& (meta[m].attribs.property !== undefined ||
meta[m].attribs.name !== undefined)
&& meta[m].attribs.content !== undefined;
});
images.each(function(m) {
var src = "";
if(validator.isURL(images[m].attribs.src, stats)){
src = images[m].attribs.src;
}else{
src = extractDomain(link) +'/'+ images[m].attribs.src;
}
images2.push({
src: src
});
});
meta.each(function(m) {
var _meta = meta[m];
var property;
if (_meta.attribs.property) {
property = _meta.attribs.property.split(':');
} else {
property = _meta.attribs.name.split(':');
}
/*
* Checking if property name is mapped to a value, if it's not,
* use it as it is
*/
var propertyName = attributeMapping[property[0]] ? attributeMapping[property[0]] : property[0];
var propertyValue = property[1];
/*
* Checks if the meta tag 'vendor' is present on our metatags hash
*/
if (!metatags.hasOwnProperty(propertyName)) {
metatags[propertyName] = {};
}
if(typeof propertyValue == 'undefined'){
metatags[propertyName] = _meta.attribs.content;
}else{
metatags[propertyName][propertyValue] = _meta.attribs.content;
}
});
/*
* Include tag title
*/
metatags['title'] = title;
/*
* Include images from document
*/
metatags['otherimages'] = images2;
cb(null, metatags);
});
};