-
-
Notifications
You must be signed in to change notification settings - Fork 11
/
retrieve.go
76 lines (69 loc) · 1.81 KB
/
retrieve.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
package filters
import (
"bytes"
"net/url"
"github.com/PuerkitoBio/goquery"
"github.com/microcosm-cc/bluemonday"
"github.com/slurdge/goeland/internal/goeland"
"github.com/slurdge/goeland/internal/goeland/httpget"
"github.com/spf13/viper"
)
var policy *bluemonday.Policy
// Deprecated: use filterRetrieveContent instead
func filterLeBrief(source *goeland.Source, params *filterParams) {
params.args = []string{"div.content"}
filterRetrieveContent(source, params)
}
func filterRetrieveContent(source *goeland.Source, params *filterParams) {
args := params.args
if len(args) < 1 {
return
}
query := args[0]
for index, entry := range source.Entries {
link := entry.URL
body, err := httpget.GetHTTPRessource(link)
if err != nil {
continue
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
if err != nil {
continue
}
base, err := url.Parse(link)
if err != nil {
continue
}
fullContent := doc.Find(query)
makeAttrFilter := func(attr string) func(_ int, selection *goquery.Selection) {
return func(i int, selection *goquery.Selection) {
src, exist := selection.Attr(attr)
if !exist {
return
}
relative, err := url.Parse(src)
if err != nil {
return
}
selection.SetAttr(attr, base.ResolveReference(relative).String())
}
}
srcFilter := makeAttrFilter("src")
hrefFilter := makeAttrFilter("href")
fullContent.Find("img").Each(srcFilter)
fullContent.Find("a").Each(hrefFilter)
html, err := fullContent.Html()
if err != nil {
continue
}
if !viper.GetBool("unsafe-no-sanitize-filter") {
entry.Content = policy.Sanitize(html)
} else {
entry.Content = html
}
source.Entries[index] = entry
}
}
func init() {
policy = bluemonday.UGCPolicy()
}