forked from Gonzih/feeds2imap-go
/
feeds.go
218 lines (164 loc) · 4.5 KB
/
feeds.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
package feeds2imap
import (
"encoding/json"
"io/ioutil"
"log"
"os"
"sync"
"time"
"github.com/google/uuid"
"github.com/mmcdole/gofeed"
"github.com/spf13/viper"
)
// InputURLs represents freshly parsed configuration
type InputURLs map[string][]string
// FlatURLs represents url -> folder map
type FlatURLs map[string]string
// FeedWithFolder represents folder name and feed item combined
type FeedWithFolder struct {
Feed *gofeed.Feed
Folder string
}
// FeedsWithFolders represents collection of FeedWithFolder
type FeedsWithFolders []FeedWithFolder
// ItemsWithFolders represents rss item (post), folder and original feed title cobined
type ItemWithFolder struct {
Item *gofeed.Item
Folder string
FeedTitle string
FeedLink string
}
// ItemsWithFolders represents collection of ItemWithFolder
type ItemsWithFolders []ItemWithFolder
// ItemsCache represents GUIDs cache
type ItemsCache []string
func readInputURLsFile() InputURLs {
return InputURLs(viper.GetStringMapStringSlice("urls"))
}
func flattenInputURLs(urls InputURLs) FlatURLs {
flaturls := make(FlatURLs)
for folder, links := range urls {
for _, link := range links {
flaturls[link] = folder
}
}
return flaturls
}
func fetchFeedData(urls FlatURLs) (FeedsWithFolders, error) {
var parsedLock sync.Mutex
var wg sync.WaitGroup
var parsed FeedsWithFolders
for url, folder := range urls {
wg.Add(1)
go func(url, folder string) {
defer wg.Done()
if viper.GetBool("debug") {
log.Printf("Fetching: %s", url)
}
fp := gofeed.NewParser()
feed, err := fp.ParseURL(url)
if err != nil {
log.Printf("Error while fetching %s: %s", url, err)
return
}
parsedLock.Lock()
defer parsedLock.Unlock()
parsed = append(parsed, FeedWithFolder{Feed: feed, Folder: folder})
}(url, folder)
}
wg.Wait()
return parsed, nil
}
func flattenFeedData(feeds FeedsWithFolders) (items ItemsWithFolders) {
for _, fWithFolder := range feeds {
folder := fWithFolder.Folder
for _, item := range fWithFolder.Feed.Items {
items = append(items, ItemWithFolder{Item: item, Folder: folder, FeedTitle: fWithFolder.Feed.Title, FeedLink: fWithFolder.Feed.Link})
}
}
return
}
// ReadCacheFile reads cache file from fs
func ReadCacheFile() ItemsCache {
var cache ItemsCache
fname := viper.GetString("paths.cache")
if _, err := os.Stat(fname); os.IsNotExist(err) {
return cache
}
f, err := os.Open(fname)
if err != nil {
log.Println(err)
return cache
}
bytes, err := ioutil.ReadAll(f)
if err != nil {
log.Println(err)
return cache
}
err = json.Unmarshal(bytes, &cache)
return cache
}
// CommitToCache saves item data to db
func CommitToCache(items ItemsWithFolders) error {
for _, item := range items {
i := item.Item
uuid := uuid.New().String()
author := formatAuthor(i)
link := formatLink(i.Link)
var content string
if len(i.Content) > 0 {
content = i.Content
} else {
content = i.Description
}
var published time.Time
if i.PublishedParsed != nil {
published = *i.PublishedParsed
} else {
published = time.Now()
}
err := CommitToDB(uuid, i.GUID, i.Title, link, author, item.FeedTitle, item.FeedLink, item.Folder, content, published)
if err != nil {
return err
}
}
return nil
}
func filterNewItems(entries ItemsWithFolders) (newItems ItemsWithFolders) {
for _, entry := range entries {
if IsExistingID(entry.Item.GUID) {
continue
}
newItems = append(newItems, entry)
}
return
}
// FetchFullContent fetches the full content for a entry that only publishes partial content in it's feed
func FetchFullContent(entry ItemWithFolder) ItemWithFolder {
log.Printf("Fetching full text for '%s' (%s)", entry.Item.Title, entry.Item.Link)
fullContent, err := GetFullTextReadability(entry.Item.Link)
if err != nil {
log.Printf("ERROR: Unable to retrieve content for %s. Error: %s", entry.Item.Link, err)
} else {
entry.Item.Description = fullContent
}
return entry
}
// FetchNewFeedItems loads configuration, fetches rss items and discards ones that are in cache already returning new items and new version of a cache
func FetchNewFeedItems() ItemsWithFolders {
input := readInputURLsFile()
flat := flattenInputURLs(input)
parsed, err := fetchFeedData(flat)
if err != nil {
log.Fatal(err)
}
allItems := flattenFeedData(parsed)
newItems := filterNewItems(allItems)
for _, entry := range newItems {
// TODO Make this inline as a configuration entry not a folder name
if entry.Folder == "full" {
entry = FetchFullContent(entry)
}
}
return newItems
}