/
atom.go
105 lines (90 loc) · 2.47 KB
/
atom.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
// Atom 1.0 parser
package parser
import (
"encoding/xml"
"html"
"io"
"strings"
"github.com/nkanaev/yarr/src/content/htmlutil"
)
type atomFeed struct {
XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
ID string `xml:"id"`
Title atomText `xml:"title"`
Links atomLinks `xml:"link"`
Entries []atomEntry `xml:"entry"`
}
type atomEntry struct {
ID string `xml:"id"`
Title atomText `xml:"title"`
Summary atomText `xml:"summary"`
Published string `xml:"published"`
Updated string `xml:"updated"`
Links atomLinks `xml:"link"`
Content atomText `xml:"http://www.w3.org/2005/Atom content"`
OrigLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
media
}
type atomText struct {
Type string `xml:"type,attr"`
Data string `xml:",chardata"`
XML string `xml:",innerxml"`
}
type atomLink struct {
Href string `xml:"href,attr"`
Rel string `xml:"rel,attr"`
}
type atomLinks []atomLink
func (a *atomText) Text() string {
if a.Type == "html" {
return htmlutil.ExtractText(a.Data)
} else if a.Type == "xhtml" {
return htmlutil.ExtractText(a.XML)
}
return a.Data
}
func (a *atomText) String() string {
data := a.Data
if a.Type == "xhtml" {
data = a.XML
}
return html.UnescapeString(strings.TrimSpace(data))
}
func (links atomLinks) First(rel string) string {
for _, l := range links {
if l.Rel == rel {
return l.Href
}
}
return ""
}
func ParseAtom(r io.Reader) (*Feed, error) {
srcfeed := atomFeed{}
decoder := xmlDecoder(r)
if err := decoder.Decode(&srcfeed); err != nil {
return nil, err
}
dstfeed := &Feed{
Title: srcfeed.Title.String(),
SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
}
for _, srcitem := range srcfeed.Entries {
linkFromID := ""
guidFromID := ""
if htmlutil.IsAPossibleLink(srcitem.ID) {
linkFromID = srcitem.ID
guidFromID = srcitem.ID + "::" + srcitem.Updated
}
link := firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First(""), linkFromID)
dstfeed.Items = append(dstfeed.Items, Item{
GUID: firstNonEmpty(guidFromID, srcitem.ID, link),
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
URL: link,
Title: srcitem.Title.Text(),
Content: firstNonEmpty(srcitem.Content.String(), srcitem.Summary.String(), srcitem.firstMediaDescription()),
ImageURL: srcitem.firstMediaThumbnail(),
AudioURL: "",
})
}
return dstfeed, nil
}