-
Notifications
You must be signed in to change notification settings - Fork 204
/
parseutils.go
102 lines (89 loc) · 2.75 KB
/
parseutils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package shared
import (
"fmt"
"regexp"
"strings"
"github.com/mmcdole/goxpp"
)
var (
emailNameRgx = regexp.MustCompile(`^([^@]+@[^\s]+)\s+\(([^@]+)\)$`)
nameEmailRgx = regexp.MustCompile(`^([^@]+)\s+\(([^@]+@[^)]+)\)$`)
nameOnlyRgx = regexp.MustCompile(`^([^@()]+)$`)
emailOnlyRgx = regexp.MustCompile(`^([^@()]+@[^@()]+)$`)
)
// FindRoot iterates through the tokens of an xml document until
// it encounters its first StartTag event. It returns an error
// if it reaches EndDocument before finding a tag.
func FindRoot(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
event, err = p.Next()
if err != nil {
return event, err
}
if event == xpp.StartTag {
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find root node before document end.")
}
}
return
}
// ParseText is a helper function for parsing the text
// from the current element of the XMLPullParser.
// This function can handle parsing naked XML text from
// an element.
func ParseText(p *xpp.XMLPullParser) (string, error) {
var text struct {
Type string `xml:"type,attr"`
InnerXML string `xml:",innerxml"`
}
err := p.DecodeElement(&text)
if err != nil {
return "", err
}
result := text.InnerXML
result = strings.TrimSpace(result)
if strings.HasPrefix(result, "<![CDATA[") &&
strings.HasSuffix(result, "]]>") {
result = strings.TrimPrefix(result, "<![CDATA[")
result = strings.TrimSuffix(result, "]]>")
return result, nil
}
result = DecodeEntities(result)
return result, nil
}
// DecodeEntities decodes escaped XML entities
// in a string and returns the unescaped string
func DecodeEntities(str string) string {
str = strings.Replace(str, "<", "<", -1)
str = strings.Replace(str, ">", ">", -1)
str = strings.Replace(str, """, "\"", -1)
str = strings.Replace(str, "'", "'", -1)
str = strings.Replace(str, "&", "&", -1)
return str
}
// ParseNameAddress parses name/email strings commonly
// found in RSS feeds of the format "Example Name (example@site.com)"
// and other variations of this format.
func ParseNameAddress(nameAddressText string) (name string, address string) {
if nameAddressText == "" {
return
}
if emailNameRgx.MatchString(nameAddressText) {
result := emailNameRgx.FindStringSubmatch(nameAddressText)
address = result[1]
name = result[2]
} else if nameEmailRgx.MatchString(nameAddressText) {
result := nameEmailRgx.FindStringSubmatch(nameAddressText)
name = result[1]
address = result[2]
} else if nameOnlyRgx.MatchString(nameAddressText) {
result := nameOnlyRgx.FindStringSubmatch(nameAddressText)
name = result[1]
} else if emailOnlyRgx.MatchString(nameAddressText) {
result := emailOnlyRgx.FindStringSubmatch(nameAddressText)
address = result[1]
}
return
}