Skip to content

Commit

Permalink
Remove XML sanitizer which was screwing up charset conversion (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcdole committed Aug 27, 2016
1 parent 83a445c commit e152f79
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 9 deletions.
4 changes: 1 addition & 3 deletions atom/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,14 @@ import (
"github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/goxpp"
"golang.org/x/net/html/charset"
)

// Parser is an Atom Parser
type Parser struct{}

// Parse parses an xml feed into an atom.Feed
func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
fr := shared.NewXMLSanitizerReader(feed)
p := xpp.NewXMLPullParser(fr, false, charset.NewReaderLabel)
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)

_, err := shared.FindRoot(p)
if err != nil {
Expand Down
4 changes: 1 addition & 3 deletions detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (

"github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/goxpp"
"golang.org/x/net/html/charset"
)

// FeedType represents one of the possible feed
Expand All @@ -27,8 +26,7 @@ const (
// by looking for specific xml elements unique to the
// various feed types.
func DetectFeedType(feed io.Reader) FeedType {
fr := shared.NewXMLSanitizerReader(feed)
p := xpp.NewXMLPullParser(fr, false, charset.NewReaderLabel)
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)

_, err := shared.FindRoot(p)
if err != nil {
Expand Down
21 changes: 21 additions & 0 deletions internal/shared/charsetconv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package shared

import (
"fmt"
"io"

"golang.org/x/net/html/charset"
)

func NewReaderLabel(label string, input io.Reader) (io.Reader, error) {
conv, err := charset.NewReaderLabel(label, input)

fmt.Println("Created charset converter!")
if err != nil {
return nil, err
}

// Wrap the charset decoder reader with a XML sanitizer
//clean := NewXMLSanitizerReader(conv)
return conv, nil
}
4 changes: 1 addition & 3 deletions rss/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,14 @@ import (
"github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/goxpp"
"golang.org/x/net/html/charset"
)

// Parser is a RSS Parser
type Parser struct{}

// Parse parses an xml feed into an rss.Feed
func (rp *Parser) Parse(feed io.Reader) (*Feed, error) {
fr := shared.NewXMLSanitizerReader(feed)
p := xpp.NewXMLPullParser(fr, false, charset.NewReaderLabel)
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)

_, err := shared.FindRoot(p)
if err != nil {
Expand Down

0 comments on commit e152f79

Please sign in to comment.