forked from google/safehtml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
safehtmlutil.go
180 lines (166 loc) · 5.88 KB
/
safehtmlutil.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
// Copyright (c) 2017 The Go Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
// Package safehtmlutil contains functions shared by package safehtml and safehtml/template.
package safehtmlutil
import (
"bytes"
"fmt"
"reflect"
"regexp"
)
// IsSafeTrustedResourceURLPrefix returns whether the given prefix is safe to use as a
// TrustedResourceURL prefix.
//
// TrustedResourceURL prefixes must start with one of the following:
// - `https://<origin>/`
// - `//<origin>/`
// - `/<pathStart>`
// - `about:blank#`
//
// `<origin>` must contain only alphanumerics, '.', ':', '[', ']', or '-'.
// These restrictions do not enforce a well-formed domain name, so '.' and '1.2' are valid.
//
// `<pathStart>` is any character except `/` and `\`. Based on
// https://url.spec.whatwg.org/commit-snapshots/56b74ce7cca8883eab62e9a12666e2fac665d03d/#url-parsing,
// an initial / which is not followed by another / or \ will end up in the "path state" and from there
// it can only go to the "fragment state" and "query state".
func IsSafeTrustedResourceURLPrefix(prefix string) bool {
return safeTrustedResourceURLPrefixPattern.MatchString(prefix)
}
var safeTrustedResourceURLPrefixPattern = regexp.MustCompile(`(?i)^(?:` +
`(?:https:)?//[0-9a-z.:\[\]-]+/|` +
`/[^/\\]|` +
`about:blank#)`)
// URLContainsDoubleDotSegment returns whether the given URL or URL substring
// contains the double dot-segment ".." (RFC3986 3.3) in its percent-encoded or
// unencoded form.
func URLContainsDoubleDotSegment(url string) bool {
return urlDoubleDotSegmentPattern.MatchString(url)
}
var urlDoubleDotSegmentPattern = regexp.MustCompile(`(?i)(?:\.|%2e)(?:\.|%2e)`)
// QueryEscapeURL produces an output that can be embedded in a URL query.
// The output can be embedded in an HTML attribute without further escaping.
func QueryEscapeURL(args ...interface{}) string {
return urlProcessor(false, Stringify(args...))
}
// NormalizeURL normalizes URL content so it can be embedded in a quote-delimited
// string or parenthesis delimited url(...).
// The normalizer does not encode all HTML specials. Specifically, it does not
// encode '&' so correct embedding in an HTML attribute requires escaping of
// '&' to '&'.
func NormalizeURL(args ...interface{}) string {
return urlProcessor(true, Stringify(args...))
}
// urlProcessor normalizes (when norm is true) or escapes its input to produce
// a valid hierarchical or opaque URL part.
func urlProcessor(norm bool, s string) string {
var b bytes.Buffer
written := 0
// The byte loop below assumes that all URLs use UTF-8 as the
// content-encoding. This is similar to the URI to IRI encoding scheme
// defined in section 3.1 of RFC 3987, and behaves the same as the
// EcmaScript builtin encodeURIComponent.
// It should not cause any misencoding of URLs in pages with
// Content-type: text/html;charset=UTF-8.
for i, n := 0, len(s); i < n; i++ {
c := s[i]
switch c {
// Single quote and parens are sub-delims in RFC 3986, but we
// escape them so the output can be embedded in single
// quoted attributes and unquoted CSS url(...) constructs.
// Single quotes are reserved in URLs, but are only used in
// the obsolete "mark" rule in an appendix in RFC 3986
// so can be safely encoded.
case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
if norm {
continue
}
// Unreserved according to RFC 3986 sec 2.3
// "For consistency, percent-encoded octets in the ranges of
// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
// period (%2E), underscore (%5F), or tilde (%7E) should not be
// created by URI producers
case '-', '.', '_', '~':
continue
case '%':
// When normalizing do not re-encode valid escapes.
if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
continue
}
default:
// Unreserved according to RFC 3986 sec 2.3
if 'a' <= c && c <= 'z' {
continue
}
if 'A' <= c && c <= 'Z' {
continue
}
if '0' <= c && c <= '9' {
continue
}
}
b.WriteString(s[written:i])
fmt.Fprintf(&b, "%%%02x", c)
written = i + 1
}
if written == 0 {
return s
}
b.WriteString(s[written:])
return b.String()
}
// isHex reports whether the given character is a hex digit.
func isHex(c byte) bool {
return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
}
// Stringify converts its arguments to a string. It is equivalent to
// fmt.Sprint(args...), except that it deferences all pointers.
func Stringify(args ...interface{}) string {
// Optimization for simple common case of a single string argument.
if len(args) == 1 {
if s, ok := args[0].(string); ok {
return s
}
}
for i, arg := range args {
args[i] = indirectToStringerOrError(arg)
}
return fmt.Sprint(args...)
}
var (
errorType = reflect.TypeOf((*error)(nil)).Elem()
fmtStringerType = reflect.TypeOf((*fmt.Stringer)(nil)).Elem()
)
// indirectToStringerOrError dereferences a as many times
// as necessary to reach the base type, an implementation of fmt.Stringer,
// or an implementation of error, and returns a value of that type. It returns
// nil if a is nil.
func indirectToStringerOrError(a interface{}) interface{} {
if a == nil {
return nil
}
v := reflect.ValueOf(a)
for !v.Type().Implements(fmtStringerType) && !v.Type().Implements(errorType) && v.Kind() == reflect.Ptr && !v.IsNil() {
v = v.Elem()
}
return v.Interface()
}
// Indirect returns the value, after dereferencing as many times
// as necessary to reach the base type (or nil).
func Indirect(a interface{}) interface{} {
if a == nil {
return nil
}
if t := reflect.TypeOf(a); t.Kind() != reflect.Ptr {
// Avoid creating a reflect.Value if it's not a pointer.
return a
}
v := reflect.ValueOf(a)
for v.Kind() == reflect.Ptr && !v.IsNil() {
v = v.Elem()
}
return v.Interface()
}