Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow parsing of atom tags in RSS #153

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
225 changes: 112 additions & 113 deletions atom/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ var (
"uri": true,
"url": true, // atom 0.3
}

// No known explicit extension parsers for Atom, currently
emptyExtParsers = make(shared.ExtParsers)
)

// Parser is an Atom Parser
Expand All @@ -38,6 +41,14 @@ func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
return ap.parseRoot(p)
}

func (ap *Parser) ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) {
entry := &Entry{}
if err := ap.parseEntryContent(p, entry); err != nil {
return nil, err
}
return entry, nil
}

func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
if err := p.Expect(xpp.StartTag, "feed"); err != nil {
return nil, err
Expand Down Expand Up @@ -69,7 +80,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -202,10 +213,6 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
}
entry := &Entry{}

contributors := []*Person{}
authors := []*Person{}
categories := []*Category{}
links := []*Link{}
extensions := ext.Extensions{}

for {
Expand All @@ -219,125 +226,20 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
}

if tok == xpp.StartTag {

name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
extensions = e
} else if name == "title" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Title = result
} else if name == "id" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.ID = result
} else if name == "rights" ||
name == "copyright" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Rights = result
} else if name == "summary" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Summary = result
} else if name == "source" {
result, err := ap.parseSource(p)
if err != nil {
return nil, err
}
entry.Source = result
} else if name == "updated" ||
name == "modified" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Updated = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.UpdatedParsed = &utcDate
}
} else if name == "contributor" {
result, err := ap.parsePerson("contributor", p)
if err != nil {
return nil, err
}
contributors = append(contributors, result)
} else if name == "author" {
result, err := ap.parsePerson("author", p)
if err != nil {
return nil, err
}
authors = append(authors, result)
} else if name == "category" {
result, err := ap.parseCategory(p)
if err != nil {
return nil, err
}
categories = append(categories, result)
} else if name == "link" {
result, err := ap.parseLink(p)
if err != nil {
return nil, err
}
links = append(links, result)
} else if name == "published" ||
name == "issued" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Published = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.PublishedParsed = &utcDate
}
} else if name == "content" {
result, err := ap.parseContent(p)
if err != nil {
return nil, err
}
entry.Content = result
} else {
err := p.Skip()
if err != nil {
if err := ap.parseEntryContent(p, entry); err != nil {
return nil, err
}
}
}
}

if len(categories) > 0 {
entry.Categories = categories
}

if len(authors) > 0 {
entry.Authors = authors
}

if len(links) > 0 {
entry.Links = links
}

if len(contributors) > 0 {
entry.Contributors = contributors
}

if len(extensions) > 0 {
entry.Extensions = extensions
}
Expand All @@ -349,6 +251,103 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
return entry, nil
}

func (ap *Parser) parseEntryContent(p *xpp.XMLPullParser, entry *Entry) error {
name := strings.ToLower(p.Name)

if name == "title" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Title = result
} else if name == "id" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.ID = result
} else if name == "rights" ||
name == "copyright" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Rights = result
} else if name == "summary" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Summary = result
} else if name == "source" {
result, err := ap.parseSource(p)
if err != nil {
return err
}
entry.Source = result
} else if name == "updated" ||
name == "modified" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Updated = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.UpdatedParsed = &utcDate
}
} else if name == "contributor" {
result, err := ap.parsePerson("contributor", p)
if err != nil {
return err
}
entry.Contributors = append(entry.Contributors, result)
} else if name == "author" {
result, err := ap.parsePerson("author", p)
if err != nil {
return err
}
entry.Authors = append(entry.Authors, result)
} else if name == "category" {
result, err := ap.parseCategory(p)
if err != nil {
return err
}
entry.Categories = append(entry.Categories, result)
} else if name == "link" {
result, err := ap.parseLink(p)
if err != nil {
return err
}
entry.Links = append(entry.Links, result)
} else if name == "published" ||
name == "issued" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Published = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.PublishedParsed = &utcDate
}
} else if name == "content" {
result, err := ap.parseContent(p)
if err != nil {
return err
}
entry.Content = result
} else {
err := p.Skip()
if err != nil {
return err
}
}
return nil
}

func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {

if err := p.Expect(xpp.StartTag, "source"); err != nil {
Expand Down Expand Up @@ -378,7 +377,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/ftest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func main() {
if strings.EqualFold(feedType, "rss") ||
strings.EqualFold(feedType, "r") {
p := rss.Parser{}
feed, err = p.Parse(strings.NewReader(fc))
feed, err = p.Parse(strings.NewReader(fc), gofeed.NewParser().BuildRSSExtParsers())
} else if strings.EqualFold(feedType, "atom") ||
strings.EqualFold(feedType, "a") {
p := atom.Parser{}
Expand Down
5 changes: 5 additions & 0 deletions extensions/extensions.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ type Extension struct {
Value string `json:"value"`
Attrs map[string]string `json:"attrs"`
Children map[string][]Extension `json:"children"`
Parsed interface{} `json:"parsed,omitempty"`
}

type Extendable interface {
GetExtensions() Extensions
}

func parseTextExtension(name string, extensions map[string][]Extension) (value string) {
Expand Down
35 changes: 33 additions & 2 deletions internal/shared/extparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ import (
"github.com/mmcdole/goxpp"
)

type ExtParser interface {
ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error)
}

type ExtParsers map[string]ExtParser

// IsExtension returns whether or not the current
// XML element is an extension element (if it has a
// non empty prefix)
Expand All @@ -22,10 +28,16 @@ func IsExtension(p *xpp.XMLPullParser) bool {
// ParseExtension parses the current element of the
// XMLPullParser as an extension element and updates
// the extension map
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser, extParsers ExtParsers) (ext.Extensions, error) {
prefix := prefixForNamespace(p.Space, p)

result, err := parseExtensionElement(p)
var result ext.Extension
var err error
if extParser, ok := extParsers[prefix]; ok {
result, err = parseExtensionFromParser(p, extParser)
} else {
result, err = parseExtensionElement(p)
}
if err != nil {
return nil, err
}
Expand All @@ -43,6 +55,23 @@ func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, er
return fe, nil
}

func parseExtensionFromParser(p *xpp.XMLPullParser, extParser ExtParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
}

e.Name = p.Name
if e.Parsed, err = extParser.ParseAsExtension(p); err != nil {
return e, err
}

if err = p.Expect(xpp.EndTag, e.Name); err != nil {
return e, err
}

return e, nil
}

func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
Expand Down Expand Up @@ -121,6 +150,8 @@ func prefixForNamespace(space string, p *xpp.XMLPullParser) string {
// These canonical prefixes override any prefixes used in the feed itself.
var canonicalNamespaces = map[string]string{
"http://webns.net/mvcb/": "admin",
"http://www.w3.org/2005/Atom": "atom",
"http://purl.org/atom/ns#": "atom03",
"http://purl.org/rss/1.0/modules/aggregation/": "ag",
"http://purl.org/rss/1.0/modules/annotate/": "annotate",
"http://media.tangent.org/rss/1.0/": "audio",
Expand Down
Loading