Skip to content

Commit 29e5ccf

Browse files
committed
Rename escape_slash in script_safe and also escape E+2028 and E+2029
It is rather common to directly interpolate JSON string inside <script> tags in HTML as to provide configuration or parameters to a script. However this may lead to XSS vulnerabilities, to prevent that 3 characters need to be escaped: - `/` (forward slash) - `U+2028` (LINE SEPARATOR) - `U+2029` (PARAGRAPH SEPARATOR) The forward slash need to be escaped to prevent closing the script tag early, and the other two are valid JSON but invalid Javascript and can be used to break JS parsing. Given that the intent of escaping forward slash is the same than escaping U+2028 and U+2029, I chos to rename and repurpose the existing `escape_slash` option.
1 parent 248bc5b commit 29e5ccf

File tree

10 files changed

+144
-65
lines changed

10 files changed

+144
-65
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Changes
22

3+
* `escape_slash` option was renamed as `script_safe` and now also escape U+2028 and U+2029. `escape_slash` is now an alias of `script_safe`.
4+
35
### 2021-10-24 (2.6.1)
46

57
* Restore version.rb with 2.6.1

ext/json/ext/generator/generator.c

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
1616
i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
1717
i_pack, i_unpack, i_create_id, i_extend, i_key_p,
1818
i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
19-
i_buffer_initial_length, i_dup, i_escape_slash;
19+
i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash;
2020

2121
/*
2222
* Copyright 2001-2004 Unicode, Inc.
@@ -124,7 +124,7 @@ static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16
124124

125125
/* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
126126
* and control characters are JSON escaped. */
127-
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash)
127+
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe)
128128
{
129129
const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
130130
const UTF8 *sourceEnd = source + RSTRING_LEN(string);
@@ -175,7 +175,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap
175175
fbuffer_append(buffer, "\\\"", 2);
176176
break;
177177
case '/':
178-
if(escape_slash) {
178+
if(script_safe) {
179179
fbuffer_append(buffer, "\\/", 2);
180180
break;
181181
}
@@ -228,7 +228,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap
228228
* characters required by the JSON standard are JSON escaped. The remaining
229229
* characters (should be UTF8) are just passed through and appended to the
230230
* result. */
231-
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash)
231+
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe)
232232
{
233233
const char *ptr = RSTRING_PTR(string), *p;
234234
unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
@@ -280,7 +280,7 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas
280280
escape_len = 2;
281281
break;
282282
case '/':
283-
if(escape_slash) {
283+
if(script_safe) {
284284
escape = "\\/";
285285
escape_len = 2;
286286
break;
@@ -294,6 +294,22 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas
294294
rb_raise(rb_path2class("JSON::GeneratorError"),
295295
"partial character in source, but hit end");
296296
}
297+
298+
if (script_safe && c == 0xE2) {
299+
unsigned char c2 = (unsigned char) *(p+1);
300+
unsigned char c3 = (unsigned char) *(p+2);
301+
if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
302+
fbuffer_append(buffer, ptr + start, end - start);
303+
start = end = (end + clen);
304+
if (c3 == 0xA8) {
305+
fbuffer_append(buffer, "\\u2028", 6);
306+
} else {
307+
fbuffer_append(buffer, "\\u2029", 6);
308+
}
309+
continue;
310+
}
311+
}
312+
297313
if (!isLegalUTF8((UTF8 *) p, clen)) {
298314
rb_raise(rb_path2class("JSON::GeneratorError"),
299315
"source sequence is illegal/malformed utf-8");
@@ -726,8 +742,12 @@ static VALUE cState_configure(VALUE self, VALUE opts)
726742
state->allow_nan = RTEST(tmp);
727743
tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
728744
state->ascii_only = RTEST(tmp);
729-
tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
730-
state->escape_slash = RTEST(tmp);
745+
tmp = rb_hash_aref(opts, ID2SYM(i_script_safe));
746+
state->script_safe = RTEST(tmp);
747+
if (!state->script_safe) {
748+
tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
749+
state->script_safe = RTEST(tmp);
750+
}
731751
return self;
732752
}
733753

@@ -762,7 +782,7 @@ static VALUE cState_to_h(VALUE self)
762782
rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
763783
rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
764784
rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
765-
rb_hash_aset(result, ID2SYM(i_escape_slash), state->escape_slash ? Qtrue : Qfalse);
785+
rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse);
766786
rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
767787
rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length));
768788
return result;
@@ -947,9 +967,9 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
947967
}
948968
#endif
949969
if (state->ascii_only) {
950-
convert_UTF8_to_JSON_ASCII(buffer, obj, state->escape_slash);
970+
convert_UTF8_to_JSON_ASCII(buffer, obj, state->script_safe);
951971
} else {
952-
convert_UTF8_to_JSON(buffer, obj, state->escape_slash);
972+
convert_UTF8_to_JSON(buffer, obj, state->script_safe);
953973
}
954974
fbuffer_append_char(buffer, '"');
955975
}
@@ -1390,27 +1410,27 @@ static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
13901410
}
13911411

13921412
/*
1393-
* call-seq: escape_slash
1413+
* call-seq: script_safe
13941414
*
13951415
* If this boolean is true, the forward slashes will be escaped in
13961416
* the json output.
13971417
*/
1398-
static VALUE cState_escape_slash(VALUE self)
1418+
static VALUE cState_script_safe(VALUE self)
13991419
{
14001420
GET_STATE(self);
1401-
return state->escape_slash ? Qtrue : Qfalse;
1421+
return state->script_safe ? Qtrue : Qfalse;
14021422
}
14031423

14041424
/*
1405-
* call-seq: escape_slash=(depth)
1425+
* call-seq: script_safe=(depth)
14061426
*
14071427
* This sets whether or not the forward slashes will be escaped in
14081428
* the json output.
14091429
*/
1410-
static VALUE cState_escape_slash_set(VALUE self, VALUE enable)
1430+
static VALUE cState_script_safe_set(VALUE self, VALUE enable)
14111431
{
14121432
GET_STATE(self);
1413-
state->escape_slash = RTEST(enable);
1433+
state->script_safe = RTEST(enable);
14141434
return Qnil;
14151435
}
14161436

@@ -1530,9 +1550,12 @@ void Init_generator(void)
15301550
rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
15311551
rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
15321552
rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
1533-
rb_define_method(cState, "escape_slash", cState_escape_slash, 0);
1534-
rb_define_method(cState, "escape_slash?", cState_escape_slash, 0);
1535-
rb_define_method(cState, "escape_slash=", cState_escape_slash_set, 1);
1553+
rb_define_method(cState, "script_safe", cState_script_safe, 0);
1554+
rb_define_method(cState, "script_safe?", cState_script_safe, 0);
1555+
rb_define_method(cState, "script_safe=", cState_script_safe_set, 1);
1556+
rb_define_alias(cState, "escape_slash", "script_safe");
1557+
rb_define_alias(cState, "escape_slash?", "script_safe?");
1558+
rb_define_alias(cState, "escape_slash=", "script_safe=");
15361559
rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
15371560
rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
15381561
rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
@@ -1589,6 +1612,7 @@ void Init_generator(void)
15891612
i_object_nl = rb_intern("object_nl");
15901613
i_array_nl = rb_intern("array_nl");
15911614
i_max_nesting = rb_intern("max_nesting");
1615+
i_script_safe = rb_intern("script_safe");
15921616
i_escape_slash = rb_intern("escape_slash");
15931617
i_allow_nan = rb_intern("allow_nan");
15941618
i_ascii_only = rb_intern("ascii_only");

ext/json/ext/generator/generator.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ static const UTF32 halfMask = 0x3FFUL;
4949
static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length);
5050
static void unicode_escape(char *buf, UTF16 character);
5151
static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
52-
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash);
53-
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash);
52+
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe);
53+
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe);
5454
static char *fstrndup(const char *ptr, unsigned long len);
5555

5656
/* ruby api and some helpers */
@@ -72,7 +72,7 @@ typedef struct JSON_Generator_StateStruct {
7272
long max_nesting;
7373
char allow_nan;
7474
char ascii_only;
75-
char escape_slash;
75+
char script_safe;
7676
long depth;
7777
long buffer_initial_length;
7878
} JSON_Generator_State;
@@ -151,8 +151,8 @@ static VALUE cState_allow_nan_p(VALUE self);
151151
static VALUE cState_ascii_only_p(VALUE self);
152152
static VALUE cState_depth(VALUE self);
153153
static VALUE cState_depth_set(VALUE self, VALUE depth);
154-
static VALUE cState_escape_slash(VALUE self);
155-
static VALUE cState_escape_slash_set(VALUE self, VALUE depth);
154+
static VALUE cState_script_safe(VALUE self);
155+
static VALUE cState_script_safe_set(VALUE self, VALUE depth);
156156
static FBuffer *cState_prepare_buffer(VALUE self);
157157
#ifndef ZALLOC
158158
#define ZALLOC(type) ((type *)ruby_zalloc(sizeof(type)))

java/src/json/ext/Generator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ public RuntimeInfo getInfo() {
139139

140140
public StringEncoder getStringEncoder() {
141141
if (stringEncoder == null) {
142-
stringEncoder = new StringEncoder(context, getState().asciiOnly(), getState().escapeSlash());
142+
stringEncoder = new StringEncoder(context, getState().asciiOnly(), getState().scriptSafe());
143143
}
144144
return stringEncoder;
145145
}

java/src/json/ext/GeneratorState.java

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ public class GeneratorState extends RubyObject {
8686
* If set to <code>true</code> the forward slash will be escaped in
8787
* json output.
8888
*/
89-
private boolean escapeSlash = DEFAULT_ESCAPE_SLASH;
90-
static final boolean DEFAULT_ESCAPE_SLASH = false;
89+
private boolean scriptSafe = DEFAULT_SCRIPT_SAFE;
90+
static final boolean DEFAULT_SCRIPT_SAFE = false;
9191
/**
9292
* The initial buffer length of this state. (This isn't really used on all
9393
* non-C implementations.)
@@ -177,9 +177,9 @@ static GeneratorState fromState(ThreadContext context, RuntimeInfo info,
177177
* <code>-Infinity</code> should be generated, otherwise an exception is
178178
* thrown if these values are encountered.
179179
* This options defaults to <code>false</code>.
180-
* <dt><code>:escape_slash</code>
181-
* <dd>set to <code>true</code> if the forward slashes should be escaped
182-
* in the json output (default: <code>false</code>)
180+
* <dt><code>:script_safe</code>
181+
* <dd>set to <code>true</code> if U+2028, U+2029 and forward slashes should be escaped
182+
* in the json output to make it safe to include in a JavaScript tag (default: <code>false</code>)
183183
*/
184184
@JRubyMethod(optional=1, visibility=Visibility.PRIVATE)
185185
public IRubyObject initialize(ThreadContext context, IRubyObject[] args) {
@@ -203,7 +203,7 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject vOrig) {
203203
this.allowNaN = orig.allowNaN;
204204
this.asciiOnly = orig.asciiOnly;
205205
this.quirksMode = orig.quirksMode;
206-
this.escapeSlash = orig.escapeSlash;
206+
this.scriptSafe = orig.scriptSafe;
207207
this.bufferInitialLength = orig.bufferInitialLength;
208208
this.depth = orig.depth;
209209
return this;
@@ -359,19 +359,24 @@ public IRubyObject max_nesting_set(IRubyObject max_nesting) {
359359
/**
360360
* Returns true if forward slashes are escaped in the json output.
361361
*/
362-
public boolean escapeSlash() {
363-
return escapeSlash;
362+
public boolean scriptSafe() {
363+
return scriptSafe;
364364
}
365365

366-
@JRubyMethod(name="escape_slash")
367-
public RubyBoolean escape_slash_get(ThreadContext context) {
368-
return context.getRuntime().newBoolean(escapeSlash);
366+
@JRubyMethod(name="script_safe", alias="escape_slash")
367+
public RubyBoolean script_safe_get(ThreadContext context) {
368+
return context.getRuntime().newBoolean(scriptSafe);
369369
}
370370

371-
@JRubyMethod(name="escape_slash=")
372-
public IRubyObject escape_slash_set(IRubyObject escape_slash) {
373-
escapeSlash = escape_slash.isTrue();
374-
return escape_slash.getRuntime().newBoolean(escapeSlash);
371+
@JRubyMethod(name="script_safe=", alias="escape_slash=")
372+
public IRubyObject script_safe_set(IRubyObject script_safe) {
373+
scriptSafe = script_safe.isTrue();
374+
return script_safe.getRuntime().newBoolean(scriptSafe);
375+
}
376+
377+
@JRubyMethod(name="script_safe?", alias="escape_slash?")
378+
public RubyBoolean script_safe_p(ThreadContext context) {
379+
return context.getRuntime().newBoolean(scriptSafe);
375380
}
376381

377382
public boolean allowNaN() {
@@ -458,7 +463,10 @@ public IRubyObject configure(ThreadContext context, IRubyObject vOpts) {
458463
maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING);
459464
allowNaN = opts.getBool("allow_nan", DEFAULT_ALLOW_NAN);
460465
asciiOnly = opts.getBool("ascii_only", DEFAULT_ASCII_ONLY);
461-
escapeSlash = opts.getBool("escape_slash", DEFAULT_ESCAPE_SLASH);
466+
scriptSafe = opts.getBool("script_safe", DEFAULT_SCRIPT_SAFE);
467+
if (!scriptSafe) {
468+
scriptSafe = opts.getBool("escape_slash", DEFAULT_SCRIPT_SAFE);
469+
}
462470
bufferInitialLength = opts.getInt("buffer_initial_length", DEFAULT_BUFFER_INITIAL_LENGTH);
463471

464472
depth = opts.getInt("depth", 0);
@@ -486,7 +494,7 @@ public RubyHash to_h(ThreadContext context) {
486494
result.op_aset(context, runtime.newSymbol("allow_nan"), allow_nan_p(context));
487495
result.op_aset(context, runtime.newSymbol("ascii_only"), ascii_only_p(context));
488496
result.op_aset(context, runtime.newSymbol("max_nesting"), max_nesting_get(context));
489-
result.op_aset(context, runtime.newSymbol("escape_slash"), escape_slash_get(context));
497+
result.op_aset(context, runtime.newSymbol("script_safe"), script_safe_get(context));
490498
result.op_aset(context, runtime.newSymbol("depth"), depth_get(context));
491499
result.op_aset(context, runtime.newSymbol("buffer_initial_length"), buffer_initial_length_get(context));
492500
for (String name: getInstanceVariableNameList()) {

java/src/json/ext/StringEncoder.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* and throws a GeneratorError if any problem is found.
1616
*/
1717
final class StringEncoder extends ByteListTranscoder {
18-
private final boolean asciiOnly, escapeSlash;
18+
private final boolean asciiOnly, scriptSafe;
1919

2020
// Escaped characters will reuse this array, to avoid new allocations
2121
// or appending them byte-by-byte
@@ -37,10 +37,10 @@ final class StringEncoder extends ByteListTranscoder {
3737
new byte[] {'0', '1', '2', '3', '4', '5', '6', '7',
3838
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
3939

40-
StringEncoder(ThreadContext context, boolean asciiOnly, boolean escapeSlash) {
40+
StringEncoder(ThreadContext context, boolean asciiOnly, boolean scriptSafe) {
4141
super(context);
4242
this.asciiOnly = asciiOnly;
43-
this.escapeSlash = escapeSlash;
43+
this.scriptSafe = scriptSafe;
4444
}
4545

4646
void encode(ByteList src, ByteList out) {
@@ -75,10 +75,17 @@ private void handleChar(int c) {
7575
escapeChar('b');
7676
break;
7777
case '/':
78-
if(escapeSlash) {
78+
if(scriptSafe) {
7979
escapeChar((char)c);
8080
break;
8181
}
82+
case 0x2028:
83+
case 0x2029:
84+
if (scriptSafe) {
85+
quoteStop(charStart);
86+
escapeUtf8Char(c);
87+
break;
88+
}
8289
default:
8390
if (c >= 0x20 && c <= 0x7f ||
8491
(c >= 0x80 && !asciiOnly)) {

lib/json.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,15 @@
285285
# # Raises JSON::NestingError (nesting of 2 is too deep):
286286
# JSON.generate(obj, max_nesting: 2)
287287
#
288+
# ====== Escaping Options
289+
#
290+
# Options +script_safe+ (boolean) specifies wether <tt>'\u2028'</tt>, <tt>'\u2029'</tt>
291+
# and <tt>'/'</tt> should be escaped as to make the JSON object safe to interpolate in script
292+
# tags.
293+
#
294+
# Options +ascii_only+ (boolean) specifies wether all characters outside the ASCII range
295+
# should be escaped.
296+
#
288297
# ====== Output Options
289298
#
290299
# The default formatting options generate the most compact

lib/json/common.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -592,13 +592,13 @@ class << self
592592
# Sets or returns the default options for the JSON.dump method.
593593
# Initially:
594594
# opts = JSON.dump_default_options
595-
# opts # => {:max_nesting=>false, :allow_nan=>true, :escape_slash=>false}
595+
# opts # => {:max_nesting=>false, :allow_nan=>true, :script_safe=>false}
596596
attr_accessor :dump_default_options
597597
end
598598
self.dump_default_options = {
599599
:max_nesting => false,
600600
:allow_nan => true,
601-
:escape_slash => false,
601+
:script_safe => false,
602602
}
603603

604604
# :call-seq:

0 commit comments

Comments
 (0)