Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

debugged safe content

  • Loading branch information...
commit 5f3e59e4ba8bb80f8ba51f4dabe5704d02a9d6dd 1 parent 4f165fc
Mike Samuel authored October 25, 2011
9  src/main/com/google/autoesc/HTML.java
@@ -134,6 +134,11 @@ private static boolean decodeEntityOnto(
134 134
       = new ReplacementTable(REPLACEMENT_TABLE)
135 135
       .add('&', null);
136 136
 
  137
+  private static final ReplacementTable NORM_BASIC_REPLACEMENT_TABLE
  138
+      = new ReplacementTable(NORM_REPLACEMENT_TABLE)
  139
+      .add('\'', null)
  140
+      .add('"', null);
  141
+
137 142
   /** escapeOnto escapes for inclusion in HTML text. */
138 143
   static void escapeOnto(@Nullable Object o, Writer out) throws IOException {
139 144
     String safe = ContentType.HTML.derefSafeContent(o);
@@ -180,8 +185,8 @@ static int filterNameOnto(@Nullable Object o, Writer out, int context)
180 185
       throws IOException {
181 186
     String safe = ContentType.HTMLAttr.derefSafeContent(o);
182 187
     if (safe != null) {
183  
-      out.write(' ');
184  
-      out.write(safe);
  188
+      if (Context.state(context) == Context.State.TagName) { out.write(' '); }
  189
+      NORM_BASIC_REPLACEMENT_TABLE.escapeOnto(safe, out);
185 190
       return context;
186 191
     }
187 192
     String s = ReplacementTable.toString(o);
66  src/main/com/google/autoesc/JS.java
@@ -156,29 +156,40 @@ static boolean isJSIdentPart(char c) {
156 156
   static final ReplacementTable STR_REPLACEMENT_TABLE
157 157
       = new ReplacementTable()
158 158
       .add((char) 0, "\\0")
159  
-      .add('\t', "\\t")
160  
-      .add('\n', "\\n")
161  
-      .add('\u000b', "\\x0b") // "\v" == "v" on IE 6.
162  
-      .add('\f', "\\f")
163  
-      .add('\r', "\\r")
164 159
       // Encode HTML specials as hex so the output can be embedded
165 160
       // in HTML attributes without further encoding.
  161
+      .add('`', "\\x60")
166 162
       .add('"', "\\x22")
167 163
       .add('&', "\\x26")
168 164
       .add('\'', "\\x27")
  165
+      // JS strings cannot contain embedded newlines.  Escape all space chars.
  166
+      // U+2028 and U+2029 handled below.
  167
+      .add('\t', "\\t")
  168
+      .add('\n', "\\n")
  169
+      .add('\u000b', "\\x0b") // "\v" == "v" on IE 6.
  170
+      .add('\f', "\\f")
  171
+      .add('\r', "\\r")
  172
+      // Prevent function calls even if they escape, and handle capturing
  173
+      // groups when inherited by regex below.
  174
+      .add('(', "\\(")
  175
+      .add(')', "\\)")
  176
+      // UTF-7 attack vector
169 177
       .add('+', "\\x2b")
  178
+      // Prevent embedded "</script"
170 179
       .add('/', "\\/")
  180
+      // Prevent embedded <!-- and -->
171 181
       .add('<', "\\x3c")
172 182
       .add('>', "\\x3e")
  183
+      // Correctness.
173 184
       .add('\\', "\\\\")
174  
-      .add('`', "\\x60")
  185
+      // JavaScript specific newline chars.
175 186
       .replaceNonAscii(new int[] { 0x2028, 0x2029 },
176 187
                        new String[] { "\\u2028", "\\u2029" });
177 188
   /**
178 189
    * STR_NORM_REPLACEMENT_TABLE is like STR_REPLACEMENT_TABLE but does not
179 190
    * overencode existing escapes since this table has no entry for "\\".
180 191
    */
181  
-  private static final ReplacementTable STR_NORM_REPLACEMENT_TABLE
  192
+  static final ReplacementTable STR_NORM_REPLACEMENT_TABLE
182 193
       = new ReplacementTable(STR_REPLACEMENT_TABLE)
183 194
       .add('\\', null);
184 195
 
@@ -193,19 +204,17 @@ protected void writeEmpty(Writer out) throws IOException {
193 204
           out.write("(?:)");
194 205
         }
195 206
       }
  207
+      .add('{', "\\{")
  208
+      .add('|', "\\|")
  209
+      .add('}', "\\}")
196 210
       .add('$', "\\$")
197  
-      .add('(', "\\(")
198  
-      .add(')', "\\)")
199 211
       .add('*', "\\*")
200 212
       .add('-', "\\-")
201 213
       .add('.', "\\.")
202 214
       .add('?', "\\?")
203 215
       .add('[', "\\[")
204 216
       .add(']', "\\]")
205  
-      .add('^', "\\^")
206  
-      .add('{', "\\{")
207  
-      .add('|', "\\|")
208  
-      .add('}', "\\}");
  217
+      .add('^', "\\^");
209 218
 
210 219
   static void escapeStrOnto(@Nullable Object o, Writer out) throws IOException {
211 220
     String safe = ContentType.JSStr.derefSafeContent(o);
@@ -291,7 +300,36 @@ void escape(@Nullable Object o, boolean protectBoundaries)
291 300
       // merge into other tokens.
292 301
       // Surrounding with parentheses might introduce call operators.
293 302
       out.write(protectBoundaries ? " null " : "null");
294  
-    } else if (o instanceof JSONMarshaler) {
  303
+      return;
  304
+    }
  305
+    if (o instanceof SafeContent) {
  306
+      SafeContent ct = (SafeContent) o;
  307
+      ContentType t = ct.getContentType();
  308
+      switch (t) {
  309
+        case JS:
  310
+          if (protectBoundaries) { out.write(' '); }
  311
+          out.write(ct.toString());
  312
+          if (protectBoundaries) { out.write(' '); }
  313
+          return;
  314
+        case JSStr:
  315
+          String s = ct.toString();
  316
+          int trailingSlashes = 0;
  317
+          for (int i = s.length(); --i >= 0; ++trailingSlashes) {
  318
+            if (s.charAt(i) != '\\') { break; }
  319
+          }
  320
+          out.write('\'');
  321
+          JS.STR_NORM_REPLACEMENT_TABLE.escapeOnto(s, out);
  322
+          if ((trailingSlashes & 1) != 0) {
  323
+            out.write('\\');
  324
+          }
  325
+          // If s ends with an incomplete escape sequence, complete it.
  326
+          out.write('\'');
  327
+          return;
  328
+        default:
  329
+          // Fall through to cases below.
  330
+      }
  331
+    }
  332
+    if (o instanceof JSONMarshaler) {
295 333
       String json = sanityCheckJSON(((JSONMarshaler) o).toJSON());
296 334
       char ch0 = json.charAt(0);  // sanityCheckJSON does not allow empty.
297 335
       if (protectBoundaries && JS.isJSIdentPart(ch0)) { out.write(' '); }
3  src/main/com/google/autoesc/URL.java
@@ -29,11 +29,12 @@
29 29
    * produce a valid hierarchical or opaque URL part.
30 30
    */
31 31
   static void escapeOnto(boolean norm, Object o, Writer out)
32  
-    throws IOException {
  32
+      throws IOException {
33 33
     String s;
34 34
     String safe = ContentType.URL.derefSafeContent(o);
35 35
     if (safe != null) {
36 36
       s = safe;
  37
+      norm = true;
37 38
     } else {
38 39
       s = ReplacementTable.toString(o);
39 40
     }
3  src/main/com/google/autoesc/package-info.java
@@ -32,7 +32,8 @@
32 32
  * results in the output
33 33
  * <blockquote>
34 34
  * {@code <b>I &lt;3 Ponies!</b>}
35  
- * {@code <button onclick="foo({&#34;foo&#34;:&#34;\x22bar\x22&#34;:42})">}
  35
+ * <code>&lt;button
  36
+ *  onclick="foo({&#34;foo&#34;:&#34;\x22bar\x22&#34;:42})"&gt;</code>
36 37
  * </blockquote>
37 38
  * The safe parts are treated as literal chunks of HTML/CSS/JS, and the unsafe
38 39
  * parts are escaped to preserve security and least-surprise.
2  src/tests/com/google/autoesc/HTMLEscapingWriterTest.java
@@ -1167,7 +1167,7 @@ public final void testSafeWriter() throws Exception {
1167 1167
             "bad dynamic attribute name 1",
1168 1168
             // The value is interpreted consistent with the attribute name.
1169 1169
             "<input {{\"onchange\"}}=\"{{\"doEvil()\"}}\">",
1170  
-            "<input onchange=\"'doEvil()'\">"
  1170
+            "<input onchange=\"'doEvil\\(\\)'\">"
1171 1171
         );
1172 1172
     assertTemplateOutput(
1173 1173
             "bad dynamic attribute name 2",
4  src/tests/com/google/autoesc/JSTest.java
@@ -229,7 +229,7 @@ public final void testJSStrEscaper() throws Exception {
229 229
     // From http://code.google.com/p/doctype/wiki/ArticleUtf7
230 230
     assertEscapedStrChars(
231 231
         "+ADw-script+AD4-alert(1)+ADw-/script+AD4-",
232  
-        "\\x2bADw-script\\x2bAD4-alert(1)\\x2bADw-\\/script\\x2bAD4-");
  232
+        "\\x2bADw-script\\x2bAD4-alert\\(1\\)\\x2bADw-\\/script\\x2bAD4-");
233 233
     // Invalid UTF-8 sequence
234 234
     assertEscapedStrChars("foo\u00A0bar", "foo\u00A0bar");
235 235
   }
@@ -294,7 +294,7 @@ public final void testEscapersOnLower7AndSelectHighCodepoints()
294 294
         "jsStrEscaper",
295 295
         "\\0\1\2\3\4\5\6\7\10\\t\\n\\x0b\\f\\r\16\17" +
296 296
         "\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37" +
297  
-        " !\\x22#$%\\x26\\x27()*\\x2b,-.\\/" +
  297
+        " !\\x22#$%\\x26\\x27\\(\\)*\\x2b,-.\\/" +
298 298
         "0123456789:;\\x3c=\\x3e?" +
299 299
         "@ABCDEFGHIJKLMNO" +
300 300
         "PQRSTUVWXYZ[\\\\]^_" +
201  src/tests/com/google/autoesc/SafeContentTest.java
... ...
@@ -0,0 +1,201 @@
  1
+// Copyright (C) 2011 Google Inc.
  2
+//
  3
+// Licensed under the Apache License, Version 2.0 (the "License");
  4
+// you may not use this file except in compliance with the License.
  5
+// You may obtain a copy of the License at
  6
+//
  7
+//      http://www.apache.org/licenses/LICENSE-2.0
  8
+//
  9
+// Unless required by applicable law or agreed to in writing, software
  10
+// distributed under the License is distributed on an "AS IS" BASIS,
  11
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12
+// See the License for the specific language governing permissions and
  13
+// limitations under the License.
  14
+
  15
+package com.google.autoesc;
  16
+
  17
+import java.io.StringWriter;
  18
+import junit.framework.TestCase;
  19
+
  20
+public class SafeContentTest extends TestCase {
  21
+
  22
+  private static final Object[] INPUTS = {
  23
+    "<b> \"foo%\" O'Reilly &bar;",
  24
+    new SafeContentString("a[href =~ \"//example.com\"]#foo", ContentType.CSS),
  25
+    new SafeContentString("Hello, <b>World</b> &amp;tc!", ContentType.HTML),
  26
+    new SafeContentString("dir=\"ltr\" title=\"x<y\"", ContentType.HTMLAttr),
  27
+    new SafeContentString("c && alert(\"Hello, World!\");", ContentType.JS),
  28
+    new SafeContentString("Hello, World & O'Reilly\\x21", ContentType.JSStr),
  29
+    new SafeContentString("greeting=H%69&addressee=(World)", ContentType.URL),
  30
+  };
  31
+
  32
+  /** @param goldens correspond to INPUTS */
  33
+  private void assertInterp(String tmpl, String... goldens) throws Exception {
  34
+    assertEquals(INPUTS.length, goldens.length);
  35
+    int prefixLen = tmpl.indexOf("{{.}}");
  36
+    String prefix = tmpl.substring(0, prefixLen);
  37
+    String suffix = tmpl.substring(prefixLen + 5);
  38
+    for (int i = 0; i < INPUTS.length; ++i) {
  39
+      Object input = INPUTS[i];
  40
+      StringWriter buf = new StringWriter();
  41
+      HTMLEscapingWriter w = new HTMLEscapingWriter(buf);
  42
+      w.writeSafe(prefix);
  43
+      w.write(input);
  44
+      w.writeSafe(suffix);
  45
+      w.close();
  46
+      String actual = buf.toString();
  47
+      actual = actual.substring(prefixLen, actual.length() - suffix.length());
  48
+      String type = input.getClass().getSimpleName() +
  49
+        (input instanceof SafeContent
  50
+         ? " " + ((SafeContent) input).getContentType()
  51
+         : "");
  52
+      assertEquals("`" + tmpl + "` with " + type, goldens[i], actual);
  53
+    }
  54
+  }
  55
+
  56
+  public final void testSafeContentInterp() throws Exception {
  57
+    // For each content sensitive escaper, see how it does on
  58
+    // each of the typed strings above.
  59
+    assertInterp("<style>{{.}} { color: blue }</style>",
  60
+        "ZautoescZ",
  61
+        // Allowed but not escaped.
  62
+        "a[href =~ \"//example.com\"]#foo",
  63
+        "ZautoescZ",
  64
+        "ZautoescZ",
  65
+        "ZautoescZ",
  66
+        "ZautoescZ",
  67
+        "ZautoescZ");
  68
+    assertInterp("<div style=\"{{.}}\">",
  69
+        "ZautoescZ",
  70
+        // Allowed and HTML escaped.
  71
+        "a[href =~ &#34;//example.com&#34;]#foo",
  72
+        "ZautoescZ",
  73
+        "ZautoescZ",
  74
+        "ZautoescZ",
  75
+        "ZautoescZ",
  76
+        "ZautoescZ");
  77
+    assertInterp("{{.}}",
  78
+        "&lt;b&gt; &#34;foo%&#34; O&#39;Reilly &amp;bar;",
  79
+        "a[href =~ &#34;//example.com&#34;]#foo",
  80
+        // Not escaped.
  81
+        "Hello, <b>World</b> &amp;tc!",
  82
+        "dir=&#34;ltr&#34; title=&#34;x&lt;y&#34;",
  83
+        "c &amp;&amp; alert(&#34;Hello, World!&#34;);",
  84
+        "Hello, World &amp; O&#39;Reilly\\x21",
  85
+        "greeting=H%69&amp;addressee=(World)");
  86
+    assertInterp("<a{{.}}>",
  87
+        "ZautoescZ",
  88
+        "ZautoescZ",
  89
+        "ZautoescZ",
  90
+        // Allowed and HTML escaped.
  91
+        " dir=\"ltr\" title=\"x&lt;y\"",
  92
+        "ZautoescZ",
  93
+        "ZautoescZ",
  94
+        "ZautoescZ");
  95
+    assertInterp("<a {{.}}>",
  96
+        "ZautoescZ",
  97
+        "ZautoescZ",
  98
+        "ZautoescZ",
  99
+        // Allowed and HTML escaped.
  100
+        "dir=\"ltr\" title=\"x&lt;y\"",
  101
+        "ZautoescZ",
  102
+        "ZautoescZ",
  103
+        "ZautoescZ");
  104
+    assertInterp("<a title={{.}}>",
  105
+        "\"&lt;b&gt; &#34;foo%&#34; O'Reilly &amp;bar;\"",
  106
+        "\"a[href =~ &#34;//example.com&#34;]#foo\"",
  107
+        // Tags stripped, spaces escaped, entity not re-escaped.
  108
+        "\"Hello, World &amp;tc!\"",
  109
+        "\"dir=&#34;ltr&#34; title=&#34;x&lt;y&#34;\"",
  110
+        "\"c &amp;&amp; alert(&#34;Hello, World!&#34;);\"",
  111
+        "\"Hello, World &amp; O'Reilly\\x21\"",
  112
+        "\"greeting=H%69&amp;addressee=(World)\"");
  113
+    assertInterp("<a title='{{.}}'>",
  114
+        "&lt;b&gt; \"foo%\" O&#39;Reilly &amp;bar;",
  115
+        "a[href =~ \"//example.com\"]#foo",
  116
+        // Tags stripped, entity not re-escaped.
  117
+        "Hello, World &amp;tc!",
  118
+        "dir=\"ltr\" title=\"x&lt;y\"",
  119
+        "c &amp;&amp; alert(\"Hello, World!\");",
  120
+        "Hello, World &amp; O&#39;Reilly\\x21",
  121
+        "greeting=H%69&amp;addressee=(World)");
  122
+    assertInterp("<textarea>{{.}}</textarea>",
  123
+        "&lt;b&gt; &#34;foo%&#34; O&#39;Reilly &amp;bar;",
  124
+        "a[href =~ &#34;//example.com&#34;]#foo",
  125
+        // Angle brackets escaped to prevent injection of close tags, entity
  126
+        // not re-escaped.
  127
+        "Hello, &lt;b&gt;World&lt;/b&gt; &amp;tc!",
  128
+        "dir=&#34;ltr&#34; title=&#34;x&lt;y&#34;",
  129
+        "c &amp;&amp; alert(&#34;Hello, World!&#34;);",
  130
+        "Hello, World &amp; O&#39;Reilly\\x21",
  131
+        "greeting=H%69&amp;addressee=(World)");
  132
+    assertInterp("<script>alert({{.}})</script>",
  133
+        "'\\x3cb\\x3e \\x22foo%\\x22 O\\x27Reilly \\x26bar;'",
  134
+        "'a[href =~ \\x22\\/\\/example.com\\x22]#foo'",
  135
+        "'Hello, \\x3cb\\x3eWorld\\x3c\\/b\\x3e \\x26amp;tc!'",
  136
+        "'dir=\\x22ltr\\x22 title=\\x22x\\x3cy\\x22'",
  137
+        // Not escaped.
  138
+        " c && alert(\"Hello, World!\"); ",
  139
+        // Escape sequence not over-escaped.
  140
+        "'Hello, World \\x26 O\\x27Reilly\\x21'",
  141
+        "'greeting=H%69\\x26addressee=\\(World\\)'");
  142
+    assertInterp("<button onclick=\"alert({{.}})\">",
  143
+        "'\\x3cb\\x3e \\x22foo%\\x22 O\\x27Reilly \\x26bar;'",
  144
+        "'a[href =~ \\x22\\/\\/example.com\\x22]#foo'",
  145
+        "'Hello, \\x3cb\\x3eWorld\\x3c\\/b\\x3e \\x26amp;tc!'",
  146
+        "'dir=\\x22ltr\\x22 title=\\x22x\\x3cy\\x22'",
  147
+        // Not JS escaped but HTML escaped.
  148
+        " c &amp;&amp; alert(&#34;Hello, World!&#34;); ",
  149
+        // Escape sequence not over-escaped.
  150
+        "'Hello, World \\x26 O\\x27Reilly\\x21'",
  151
+        "'greeting=H%69\\x26addressee=\\(World\\)'");
  152
+    assertInterp("<button onclick='alert({{.}})'>",
  153
+        "&#39;\\x3cb\\x3e \\x22foo%\\x22 O\\x27Reilly \\x26bar;&#39;",
  154
+        "&#39;a[href =~ \\x22\\/\\/example.com\\x22]#foo&#39;",
  155
+        "&#39;Hello, \\x3cb\\x3eWorld\\x3c\\/b\\x3e \\x26amp;tc!&#39;",
  156
+        "&#39;dir=\\x22ltr\\x22 title=\\x22x\\x3cy\\x22&#39;",
  157
+        // Not JS escaped but HTML escaped.
  158
+        " c &amp;&amp; alert(\"Hello, World!\"); ",
  159
+        // Escape sequence not over-escaped.
  160
+        "&#39;Hello, World \\x26 O\\x27Reilly\\x21&#39;",
  161
+        "&#39;greeting=H%69\\x26addressee=\\(World\\)&#39;");
  162
+    assertInterp("<script>alert(\"{{.}}\")</script>",
  163
+        "\\x3cb\\x3e \\x22foo%\\x22 O\\x27Reilly \\x26bar;",
  164
+        "a[href =~ \\x22\\/\\/example.com\\x22]#foo",
  165
+        "Hello, \\x3cb\\x3eWorld\\x3c\\/b\\x3e \\x26amp;tc!",
  166
+        "dir=\\x22ltr\\x22 title=\\x22x\\x3cy\\x22",
  167
+        "c \\x26\\x26 alert\\(\\x22Hello, World!\\x22\\);",
  168
+        // Escape sequence not over-escaped.
  169
+        "Hello, World \\x26 O\\x27Reilly\\x21",
  170
+        "greeting=H%69\\x26addressee=\\(World\\)");
  171
+    assertInterp("<button onclick='alert(\"{{.}}\")'>",
  172
+        "\\x3cb\\x3e \\x22foo%\\x22 O\\x27Reilly \\x26bar;",
  173
+        "a[href =~ \\x22\\/\\/example.com\\x22]#foo",
  174
+        "Hello, \\x3cb\\x3eWorld\\x3c\\/b\\x3e \\x26amp;tc!",
  175
+        "dir=\\x22ltr\\x22 title=\\x22x\\x3cy\\x22",
  176
+        "c \\x26\\x26 alert\\(\\x22Hello, World!\\x22\\);",
  177
+        // Escape sequence not over-escaped.
  178
+        "Hello, World \\x26 O\\x27Reilly\\x21",
  179
+        "greeting=H%69\\x26addressee=\\(World\\)");
  180
+    assertInterp("<a href=\"?q={{.}}\">",
  181
+        "%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b",
  182
+        "a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo",
  183
+        "Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21",
  184
+        "dir%3d%22ltr%22%20title%3d%22x%3cy%22",
  185
+        "c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b",
  186
+        "Hello%2c%20World%20%26%20O%27Reilly%5cx21",
  187
+        // Quotes and parens are escaped but %69 is not over-escaped.
  188
+        // HTML escaping is done.
  189
+        "greeting=H%69&amp;addressee=%28World%29");
  190
+    assertInterp("<style>body { background: url('?img={{.}}') }</style>",
  191
+        "%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b",
  192
+        "a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo",
  193
+        "Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21",
  194
+        "dir%3d%22ltr%22%20title%3d%22x%3cy%22",
  195
+        "c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b",
  196
+        "Hello%2c%20World%20%26%20O%27Reilly%5cx21",
  197
+        // Quotes and parens are escaped but %69 is not over-escaped.
  198
+        // HTML escaping is not done.
  199
+        "greeting=H%69&addressee=%28World%29");
  200
+  }
  201
+}

0 notes on commit 5f3e59e

Please sign in to comment.
Something went wrong with that request. Please try again.