/
feeds.go
115 lines (105 loc) · 2.53 KB
/
feeds.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package feeds
import (
"bytes"
"encoding/xml"
"fmt"
"io"
"net/http"
"strings"
"github.com/rtfb/go-html-transform/h5"
"github.com/rtfb/go-html-transform/html/transform"
"golang.org/x/net/html"
"golang.org/x/text/encoding/htmlindex"
txttransform "golang.org/x/text/transform"
)
func Read(r io.Reader) (*Feed, error) {
var buf bytes.Buffer
_, err := io.Copy(&buf, r)
if err != nil {
return nil, fmt.Errorf("reading feed: %v", err)
}
dec := xml.NewDecoder(&buf)
dec.CharsetReader = newReaderLabel
var f Feed
if err := dec.Decode(&f); err != nil {
return nil, fmt.Errorf("%v\n%s", err, buf.String())
}
if len(f.Item) > 0 && len(f.Channel.Item) == 0 {
f.Channel.Item = f.Item
}
return &f, nil
}
func newReaderLabel(label string, in io.Reader) (io.Reader, error) {
enc, _ := htmlindex.Get(label)
if enc != nil {
return nil, fmt.Errorf("unsupported charset: %q", label)
}
return txttransform.NewReader(in, enc.NewDecoder()), nil
}
var ErrGetHTTP = fmt.Errorf("http get error")
func ReadHttp(url string) (*Feed, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
return nil, ErrGetHTTP
}
return Read(resp.Body)
}
type Entry struct {
Item
}
func renderHtml(r io.Reader, w io.Writer) error {
parts, err := h5.Partial(r)
if err != nil {
return err
}
node := &html.Node{Type: html.DocumentNode}
for _, p := range parts {
node.AppendChild(p)
}
tree := h5.NewTree(node)
t := transform.New(&tree)
t.Apply(transform.TransformFunc(imgAlt), "img")
return h5.RenderNodes(w, []*html.Node{t.Doc()})
}
func (e *Entry) Html() (io.Reader, error) {
var buf bytes.Buffer
fmt.Fprintf(&buf, "<h1><a href=\"%s\">%s</a></h1>\n", e.Link, e.Title)
if e.Content != "" {
renderHtml(strings.NewReader(e.Content), &buf)
} else if e.Encoded != "" {
renderHtml(strings.NewReader(e.Encoded), &buf)
} else {
renderHtml(strings.NewReader(e.Description), &buf)
}
fmt.Fprintf(&buf, "\n<p>Url: <a href=\"%s\">%s</a></p>", e.Link, e.Link)
if url := e.Enclosure.URL; url != "" && url != e.Link {
fmt.Fprintf(&buf, "\n<p>Enclosure: <a href=\"%s\">%s</a></p>", url, url)
}
fmt.Fprintln(&buf)
return &buf, nil
}
func imgAlt(n *html.Node) {
var alt string
for _, a := range n.Attr {
if a.Key == "alt" {
alt = html.UnescapeString(a.Val)
break
}
}
if alt == "" {
return
}
p := h5.Element("p", nil, &html.Node{
Data: alt,
Type: html.TextNode,
})
if n.NextSibling != nil {
n.Parent.InsertBefore(p, n.NextSibling)
} else {
n.Parent.AppendChild(p)
}
}