Skip to content

Commit

Permalink
Parse podcast categories
Browse files Browse the repository at this point in the history
  • Loading branch information
fguillot committed Mar 12, 2024
1 parent f8e5094 commit 6d97f8b
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 40 deletions.
11 changes: 11 additions & 0 deletions internal/reader/itunes/itunes.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ type ItunesFeedElement struct {
ItunesType string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd type"`
}

func (i *ItunesFeedElement) GetItunesCategories() []string {
var categories []string
for _, category := range i.ItunesCategories {
categories = append(categories, category.Text)
if category.SubCategory != nil {
categories = append(categories, category.SubCategory.Text)
}
}
return categories
}

type ItunesItemElement struct {
ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd author"`
ItunesEpisode string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd episode"`
Expand Down
110 changes: 92 additions & 18 deletions internal/reader/rss/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1434,18 +1434,17 @@ func TestParseEntryWithRSSDescriptionAndMediaDescription(t *testing.T) {
}
}

func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
func TestParseFeedWithCategories(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
<channel>
<title>Example</title>
<link>https://example.org/</link>
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
<category>Category 1</category>
<category><![CDATA[Category 2]]></category>
<item>
<title>Test</title>
<link>https://example.org/item</link>
<category>Category 1</category>
<category>Category 2</category>
</item>
</channel>
</rss>`
Expand All @@ -1459,27 +1458,99 @@ func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}

expected := "Category 2"
result := feed.Entries[0].Tags[1]
if result != expected {
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
expected := []string{"Category 1", "Category 2"}
result := feed.Entries[0].Tags

for i, tag := range result {
if tag != expected[i] {
t.Errorf("Incorrect tag, got: %q", tag)
}
}
}

func TestParseEntryWithCategoryAndCDATA(t *testing.T) {
func TestParseEntryWithCategories(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
<channel>
<title>Example</title>
<link>https://example.org/</link>
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
<category>Category 3</category>
<item>
<title>Test</title>
<link>https://example.org/item</link>
<category>Category 1</category>
<category><![CDATA[Category 2]]></category>
</item>
</channel>
</rss>`

feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}

if len(feed.Entries[0].Tags) != 3 {
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}

expected := []string{"Category 1", "Category 2", "Category 3"}
result := feed.Entries[0].Tags

for i, tag := range result {
if tag != expected[i] {
t.Errorf("Incorrect tag, got: %q", tag)
}
}
}

func TestParseFeedWithItunesCategories(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0">
<channel>
<title>Example</title>
<link>https://example.org/</link>
<itunes:category text="Society &amp; Culture">
<itunes:category text="Documentary" />
</itunes:category>
<itunes:category text="Health">
<itunes:category text="Mental Health" />
</itunes:category>
<item>
<title>Test</title>
<link>https://example.org/item</link>
</item>
</channel>
</rss>`

feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}

if len(feed.Entries[0].Tags) != 4 {
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}

expected := []string{"Society & Culture", "Documentary", "Health", "Mental Health"}
result := feed.Entries[0].Tags

for i, tag := range result {
if tag != expected[i] {
t.Errorf("Incorrect tag, got: %q", tag)
}
}
}

func TestParseFeedWithGooglePlayCategory(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:gplay="http://www.google.com/schemas/play-podcasts/1.0" version="2.0">
<channel>
<title>Example</title>
<link>https://example.org/</link>
<gplay:category text="Art"></gplay:category>
<item>
<title>Test</title>
<link>https://example.org/item</link>
<author>
by <![CDATA[Foo Bar]]>
</author>
<category>Sample Category</category>
</item>
</channel>
</rss>`
Expand All @@ -1493,10 +1564,13 @@ func TestParseEntryWithCategoryAndCDATA(t *testing.T) {
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}

expected := "Sample Category"
result := feed.Entries[0].Tags[0]
if result != expected {
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
expected := []string{"Art"}
result := feed.Entries[0].Tags

for i, tag := range result {
if tag != expected[i] {
t.Errorf("Incorrect tag, got: %q", tag)
}
}
}

Expand Down
32 changes: 10 additions & 22 deletions internal/reader/rss/rss.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type rssFeed struct {
}

type rssChannel struct {
Categories []string `xml:"rss category"`
Title string `xml:"rss title"`
Link string `xml:"rss link"`
ImageURL string `xml:"rss image>url"`
Expand Down Expand Up @@ -111,6 +112,13 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
entry.Title = entry.URL
}

entry.Tags = append(entry.Tags, r.Channel.Categories...)
entry.Tags = append(entry.Tags, r.Channel.GetItunesCategories()...)

if r.Channel.GooglePlayCategory.Text != "" {
entry.Tags = append(entry.Tags, r.Channel.GooglePlayCategory.Text)
}

feed.Entries = append(feed.Entries, entry)
}

Expand Down Expand Up @@ -165,12 +173,6 @@ type rssEnclosure struct {
Length string `xml:"length,attr"`
}

type rssCategory struct {
XMLName xml.Name
Data string `xml:",chardata"`
Inner string `xml:",innerxml"`
}

func (enclosure *rssEnclosure) Size() int64 {
if enclosure.Length == "" {
return 0
Expand All @@ -188,7 +190,7 @@ type rssItem struct {
Author rssAuthor `xml:"rss author"`
Comments string `xml:"rss comments"`
EnclosureLinks []rssEnclosure `xml:"rss enclosure"`
Categories []rssCategory `xml:"rss category"`
Categories []string `xml:"rss category"`
dublincore.DublinCoreItemElement
FeedBurnerElement
media.Element
Expand All @@ -208,7 +210,7 @@ func (r *rssItem) Transform() *model.Entry {
entry.Content = r.entryContent()
entry.Title = r.entryTitle()
entry.Enclosures = r.entryEnclosures()
entry.Tags = r.entryCategories()
entry.Tags = r.Categories
if duration, err := normalizeDuration(r.ItunesDuration); err == nil {
entry.ReadingTime = duration
}
Expand Down Expand Up @@ -383,20 +385,6 @@ func (r *rssItem) entryEnclosures() model.EnclosureList {
return enclosures
}

func (r *rssItem) entryCategories() []string {
categoryList := make([]string, 0)

for _, rssCategory := range r.Categories {
if strings.Contains(rssCategory.Inner, "<![CDATA[") {
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Data))
} else {
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Inner))
}
}

return categoryList
}

func (r *rssItem) entryCommentsURL() string {
commentsURL := strings.TrimSpace(r.Comments)
if commentsURL != "" && urllib.IsAbsoluteURL(commentsURL) {
Expand Down

0 comments on commit 6d97f8b

Please sign in to comment.