diff --git a/detector.go b/detector.go index b3853b8d..f8ae4dfd 100644 --- a/detector.go +++ b/detector.go @@ -33,15 +33,16 @@ func DetectFeedType(feed io.Reader) FeedType { buffer := new(bytes.Buffer) buffer.ReadFrom(feed) - // remove leading whitespace (if exists) var firstChar byte loop: for { ch, err := buffer.ReadByte() if err != nil { return FeedTypeUnknown } + // ignore leading whitespace & byte order marks switch ch { case ' ', '\r', '\n', '\t': + case 0xFE, 0xFF, 0x00, 0xEF, 0xBB, 0xBF: // utf 8-16-32 bom default: firstChar = ch buffer.UnreadByte() diff --git a/detector_test.go b/detector_test.go index f3eb059c..d438bf7a 100644 --- a/detector_test.go +++ b/detector_test.go @@ -19,6 +19,7 @@ func TestDetectFeedType(t *testing.T) { {"atom03_feed.xml", gofeed.FeedTypeAtom}, {"atom10_feed.xml", gofeed.FeedTypeAtom}, {"rss_feed.xml", gofeed.FeedTypeRSS}, + {"rss_feed_bom.xml", gofeed.FeedTypeRSS}, {"rss_feed_leading_spaces.xml", gofeed.FeedTypeRSS}, {"rdf_feed.xml", gofeed.FeedTypeRSS}, {"unknown_feed.xml", gofeed.FeedTypeUnknown}, diff --git a/parser_test.go b/parser_test.go index b30959c6..287b82d1 100644 --- a/parser_test.go +++ b/parser_test.go @@ -27,6 +27,7 @@ func TestParser_Parse(t *testing.T) { {"atom03_feed.xml", "atom", "Feed Title", false}, {"atom10_feed.xml", "atom", "Feed Title", false}, {"rss_feed.xml", "rss", "Feed Title", false}, + {"rss_feed_bom.xml", "rss", "Feed Title", false}, {"rss_feed_leading_spaces.xml", "rss", "Feed Title", false}, {"rdf_feed.xml", "rss", "Feed Title", false}, {"sample.json", "json", "title", false}, @@ -68,6 +69,7 @@ func TestParser_ParseString(t *testing.T) { {"atom03_feed.xml", "atom", "Feed Title", false}, {"atom10_feed.xml", "atom", "Feed Title", false}, {"rss_feed.xml", "rss", "Feed Title", false}, + {"rss_feed_bom.xml", "rss", "Feed Title", false}, {"rss_feed_leading_spaces.xml", "rss", "Feed Title", false}, {"rdf_feed.xml", "rss", "Feed Title", false}, {"sample.json", "json", "title", false}, @@ -109,6 +111,7 @@ func TestParser_ParseURL_Success(t *testing.T) { {"atom03_feed.xml", "atom", "Feed Title", false}, {"atom10_feed.xml", "atom", "Feed Title", false}, {"rss_feed.xml", "rss", "Feed Title", false}, + {"rss_feed_bom.xml", "rss", "Feed Title", false}, {"rss_feed_leading_spaces.xml", "rss", "Feed Title", false}, {"rdf_feed.xml", "rss", "Feed Title", false}, {"sample.json", "json", "title", false}, diff --git a/testdata/parser/universal/rss_feed_bom.xml b/testdata/parser/universal/rss_feed_bom.xml new file mode 100644 index 00000000..243a1898 --- /dev/null +++ b/testdata/parser/universal/rss_feed_bom.xml @@ -0,0 +1,8 @@ + + + + Feed Title + +