Skip to content

Commit

Permalink
Resolves #54 by removing AllowDocType functionality
Browse files Browse the repository at this point in the history
The doctype is not sanitized and can not be, and this allows unsafe content to
be inserted into the output by encoding it within a doctype attribute. The only
safe way to handle this quickly is not to permit the doctype.
  • Loading branch information
buro9 committed Dec 22, 2017
1 parent 13fa02f commit a5d7ef6
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 55 deletions.
1 change: 0 additions & 1 deletion cmd/sanitise_html_email/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ func main() {

// HTML email is often displayed in iframes and needs to preserve core
// structure
p.AllowDocType(true)
p.AllowElements("html", "head", "body", "title")

// There are not safe, and is only being done here to demonstrate how to
Expand Down
18 changes: 0 additions & 18 deletions policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@ type Policy struct {
// exceptions
initialized bool

// Allows the <!DOCTYPE > tag to exist in the sanitized document
allowDocType bool

// If true then we add spaces when stripping tags, specifically the closing
// tag is replaced by a space character.
addSpaces bool
Expand Down Expand Up @@ -369,21 +366,6 @@ func (p *Policy) AllowURLSchemeWithCustomPolicy(
return p
}

// AllowDocType states whether the HTML sanitised by the sanitizer is allowed to
// contain the HTML DocType tag: <!DOCTYPE HTML> or one of it's variants.
//
// The HTML spec only permits one doctype per document, and as you know how you
// are using the output of this, you know best as to whether we should ignore it
// (default) or not.
//
// If you are sanitizing a HTML fragment the default (false) is fine.
func (p *Policy) AllowDocType(allow bool) *Policy {

p.allowDocType = allow

return p
}

// AddSpaceWhenStrippingTag states whether to add a single space " " when
// removing tags that are not whitelisted by the policy.
//
Expand Down
10 changes: 7 additions & 3 deletions sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,13 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
switch token.Type {
case html.DoctypeToken:

if p.allowDocType {
buff.WriteString(token.String())
}
// DocType is not handled as there is no safe parsing mechanism
// provided by golang.org/x/net/html for the content, and this can
// be misused to insert HTML tags that are not then sanitized
//
// One might wish to recursively sanitize here using the same policy
// but I will need to do some further testing before considering
// this.

case html.CommentToken:

Expand Down
33 changes: 0 additions & 33 deletions sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,39 +92,6 @@ func TestSignatureBehaviour(t *testing.T) {
}
}

func TestAllowDocType(t *testing.T) {
p := NewPolicy()
p.AllowElements("b")

in := "<!DOCTYPE html>Hello, <b>World</b>!"
expected := "Hello, <b>World</b>!"

out := p.Sanitize(in)
if out != expected {
t.Errorf(
"test 1 failed;\ninput : %s\noutput : %s\nexpected: %s",
in,
out,
expected,
)
}

// Allow the doctype and run the test again
p.AllowDocType(true)

expected = "<!DOCTYPE html>Hello, <b>World</b>!"

out = p.Sanitize(in)
if out != expected {
t.Errorf(
"test 1 failed;\ninput : %s\noutput : %s\nexpected: %s",
in,
out,
expected,
)
}
}

func TestLinks(t *testing.T) {

tests := []test{
Expand Down

0 comments on commit a5d7ef6

Please sign in to comment.