-
Notifications
You must be signed in to change notification settings - Fork 1
/
feed.go
146 lines (121 loc) · 3.14 KB
/
feed.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
package feed
import (
"fmt"
"html"
"io"
"net/http"
"net/url"
"regexp"
"strings"
"time"
"github.com/microcosm-cc/bluemonday"
"github.com/mmcdole/gofeed"
"github.com/theandrew168/bloggulus/internal/core"
)
// I know...
var (
codePattern = regexp.MustCompile(`(?s)<code>.*?</code>`)
footerPattern = regexp.MustCompile(`(?s)<footer>.*?</footer>`)
headerPattern = regexp.MustCompile(`(?s)<header>.*?</header>`)
navPattern = regexp.MustCompile(`(?s)<nav>.*?</nav>`)
prePattern = regexp.MustCompile(`(?s)<pre>.*?</pre>`)
)
// please PR a better way :(
func CleanHTML(s string) string {
s = codePattern.ReplaceAllString(s, "")
s = footerPattern.ReplaceAllString(s, "")
s = headerPattern.ReplaceAllString(s, "")
s = navPattern.ReplaceAllString(s, "")
s = prePattern.ReplaceAllString(s, "")
s = bluemonday.StrictPolicy().Sanitize(s)
s = html.UnescapeString(s)
s = strings.ToValidUTF8(s, "")
return s
}
type Reader interface {
ReadBlog(feedURL string) (core.Blog, error)
ReadBlogPosts(blog core.Blog) ([]core.Post, error)
ReadPostBody(post core.Post) (string, error)
}
type reader struct{}
func NewReader() Reader {
r := reader{}
return &r
}
func (r *reader) ReadBlog(feedURL string) (core.Blog, error) {
// early check to ensure the URL is valid
_, err := url.Parse(feedURL)
if err != nil {
return core.Blog{}, err
}
// attempt to parse the feed via gofeed
fp := gofeed.NewParser()
feed, err := fp.ParseURL(feedURL)
if err != nil {
return core.Blog{}, err
}
// create a core.Blog for the feed
blog := core.NewBlog(feedURL, feed.Link, feed.Title)
return blog, nil
}
func (r *reader) ReadBlogPosts(blog core.Blog) ([]core.Post, error) {
// attempt to parse the feed via gofeed
fp := gofeed.NewParser()
feed, err := fp.ParseURL(blog.FeedURL)
if err != nil {
return nil, err
}
// create a core.Post for each entry
var posts []core.Post
for _, item := range feed.Items {
// try Updated then Published to obtain a timestamp
var updated time.Time
if item.UpdatedParsed != nil {
updated = *item.UpdatedParsed
} else if item.PublishedParsed != nil {
updated = *item.PublishedParsed
} else {
// else default to now
updated = time.Now()
}
post := core.NewPost(item.Link, item.Title, updated, blog)
posts = append(posts, post)
}
return posts, nil
}
func (r *reader) ReadPostBody(post core.Post) (string, error) {
resp, err := http.Get(post.URL)
if err != nil {
return "", fmt.Errorf("%v: %v", post.URL, err)
}
defer resp.Body.Close()
buf, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("%v: %v", post.URL, err)
}
body := string(buf)
body = CleanHTML(body)
return body, nil
}
type mockReader struct {
blog core.Blog
posts []core.Post
body string
}
func NewMockReader(blog core.Blog, posts []core.Post, body string) Reader {
r := mockReader{
blog: blog,
posts: posts,
body: body,
}
return &r
}
func (r *mockReader) ReadBlog(feedURL string) (core.Blog, error) {
return r.blog, nil
}
func (r *mockReader) ReadBlogPosts(blog core.Blog) ([]core.Post, error) {
return r.posts, nil
}
func (r *mockReader) ReadPostBody(post core.Post) (string, error) {
return r.body, nil
}