-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
parse.go
117 lines (99 loc) · 2.2 KB
/
parse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
package rss
import (
"compress/gzip"
"encoding/xml"
"io"
"io/ioutil"
"net/http"
"net/url"
)
// Parse filename
func ParseFile(filename string) (*Rss, error) {
b, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
return ParseByte(b)
}
// Parse bytes
func ParseByte(b []byte) (*Rss, error) {
r := &Rss{}
err := xml.Unmarshal(b, r)
if err != nil {
return nil, err
}
return r, nil
}
// Parse string
func ParseString(str string) (*Rss, error) {
return ParseByte([]byte(str))
}
// Parse URL
func ParseURL(rawurl string, client *http.Client) (*Rss, error) {
urlparse, err := url.Parse(rawurl)
if err != nil {
return nil, err
}
request, err := http.NewRequest("GET", urlparse.String(), nil)
if err != nil {
return nil, err
}
request.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) "+
"AppleWebKit/537.36 (KHTML, like Gecko) "+
"Chrome/80.0.3987.163 Safari/537.36")
request.Header.Set("Accept-Encoding", "gzip")
request.Header.Set("Cache-Control", "max-age=0")
response, err := client.Do(request)
if err != nil {
return nil, err
}
defer response.Body.Close()
// Check that the server actually sent compressed data
var reader io.ReadCloser
switch response.Header.Get("Content-Encoding") {
case "gzip":
reader, err = gzip.NewReader(response.Body)
defer reader.Close()
default:
reader = response.Body
}
b, err := ioutil.ReadAll(reader)
if err != nil {
return nil, err
}
return ParseByte(b)
}
// Parse URL bulk
func ParseBulk(rawurls []string, client *http.Client, options *BulkOptions) []Rss {
urlChan := make(chan string, options.buffer_chan)
resChan := make(chan *Rss, options.buffer_chan)
var sliceRss []Rss
for i := 0; i < options.maxgoroutine; i++ {
go func(urlChan chan string, resChan chan *Rss, id int) {
for {
rawurl, ok := <-urlChan
if ok {
feed, err := ParseURL(rawurl, client)
if err != nil {
panic(err)
}
resChan <- feed
} else {
break
}
}
}(urlChan, resChan, i)
}
go func() {
for _, v := range rawurls {
urlChan <- v
}
close(urlChan)
}()
for i := 0; i < len(rawurls); i++ {
res := <-resChan
sliceRss = append(sliceRss, *res)
}
close(resChan)
return sliceRss
}