@@ -156,7 +156,7 @@ unescape_unicode_write(uint8_t *dest, uint32_t value, const uint8_t *start, cons
156
156
// If we get here, then the value is too big. This is an error, but we don't
157
157
// want to just crash, so instead we'll add an error to the error list and put
158
158
// in a replacement character instead.
159
- yp_diagnostic_list_append (error_list , start , end , "Invalid Unicode escape sequence." );
159
+ if ( error_list ) yp_diagnostic_list_append (error_list , start , end , "Invalid Unicode escape sequence." );
160
160
dest [0 ] = 0xEF ;
161
161
dest [1 ] = 0xBF ;
162
162
dest [2 ] = 0xBD ;
@@ -186,7 +186,15 @@ unescape_char(uint8_t value, const uint8_t flags) {
186
186
187
187
// Read a specific escape sequence into the given destination.
188
188
static const uint8_t *
189
- unescape (yp_parser_t * parser , uint8_t * dest , size_t * dest_length , const uint8_t * backslash , const uint8_t * end , const uint8_t flags ) {
189
+ unescape (
190
+ yp_parser_t * parser ,
191
+ uint8_t * dest ,
192
+ size_t * dest_length ,
193
+ const uint8_t * backslash ,
194
+ const uint8_t * end ,
195
+ const uint8_t flags ,
196
+ yp_list_t * error_list
197
+ ) {
190
198
switch (backslash [1 ]) {
191
199
case 'a' :
192
200
case 'b' :
@@ -226,7 +234,7 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
226
234
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
227
235
case 'u' : {
228
236
if ((flags & YP_UNESCAPE_FLAG_CONTROL ) | (flags & YP_UNESCAPE_FLAG_META )) {
229
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 2 , "Unicode escape sequence cannot be used with control or meta flags." );
237
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 2 , "Unicode escape sequence cannot be used with control or meta flags." );
230
238
return backslash + 2 ;
231
239
}
232
240
@@ -243,12 +251,11 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
243
251
244
252
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
245
253
if (hexadecimal_length > 6 ) {
246
- yp_diagnostic_list_append (& parser -> error_list , unicode_cursor , unicode_cursor + hexadecimal_length , "invalid Unicode escape." );
254
+ if ( error_list ) yp_diagnostic_list_append (error_list , unicode_cursor , unicode_cursor + hexadecimal_length , "invalid Unicode escape." );
247
255
}
248
-
249
256
// there are not hexadecimal characters
250
- if (hexadecimal_length == 0 ) {
251
- yp_diagnostic_list_append (& parser -> error_list , unicode_cursor , unicode_cursor + hexadecimal_length , "unterminated Unicode escape" );
257
+ else if (hexadecimal_length == 0 ) {
258
+ if ( error_list ) yp_diagnostic_list_append (error_list , unicode_cursor , unicode_cursor + hexadecimal_length , "unterminated Unicode escape" );
252
259
return unicode_cursor ;
253
260
}
254
261
@@ -261,63 +268,63 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
261
268
uint32_t value ;
262
269
unescape_unicode (unicode_start , (size_t ) (unicode_cursor - unicode_start ), & value );
263
270
if (dest ) {
264
- * dest_length += unescape_unicode_write (dest + * dest_length , value , unicode_start , unicode_cursor , & parser -> error_list );
271
+ * dest_length += unescape_unicode_write (dest + * dest_length , value , unicode_start , unicode_cursor , error_list );
265
272
}
266
273
267
274
unicode_cursor += yp_strspn_whitespace (unicode_cursor , end - unicode_cursor );
268
275
}
269
276
270
277
// ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
271
- if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1 )
272
- yp_diagnostic_list_append (& parser -> error_list , extra_codepoints_start , unicode_cursor - 1 , "Multiple codepoints at single character literal" );
273
-
278
+ if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1 ) {
279
+ if ( error_list ) yp_diagnostic_list_append (error_list , extra_codepoints_start , unicode_cursor - 1 , "Multiple codepoints at single character literal" );
280
+ }
274
281
275
282
if (unicode_cursor < end && * unicode_cursor == '}' ) {
276
283
unicode_cursor ++ ;
277
284
} else {
278
- yp_diagnostic_list_append (& parser -> error_list , backslash , unicode_cursor , "invalid Unicode escape." );
285
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , unicode_cursor , "invalid Unicode escape." );
279
286
}
287
+
280
288
return unicode_cursor ;
281
289
}
282
-
283
- if ((backslash + 5 ) < end && yp_char_is_hexadecimal_digits (backslash + 2 , 4 )) {
290
+ else if ((backslash + 5 ) < end && yp_char_is_hexadecimal_digits (backslash + 2 , 4 )) {
284
291
uint32_t value ;
285
292
unescape_unicode (backslash + 2 , 4 , & value );
286
293
287
294
if (dest ) {
288
- * dest_length += unescape_unicode_write (dest + * dest_length , value , backslash + 2 , backslash + 6 , & parser -> error_list );
295
+ * dest_length += unescape_unicode_write (dest + * dest_length , value , backslash + 2 , backslash + 6 , error_list );
289
296
}
290
297
return backslash + 6 ;
291
298
}
292
299
293
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 2 , "Invalid Unicode escape sequence" );
300
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 2 , "Invalid Unicode escape sequence" );
294
301
return backslash + 2 ;
295
302
}
296
303
// \c\M-x meta control character, where x is an ASCII printable character
297
304
// \c? delete, ASCII 7Fh (DEL)
298
305
// \cx control character, where x is an ASCII printable character
299
306
case 'c' :
300
307
if (backslash + 2 >= end ) {
301
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 1 , "Invalid control escape sequence" );
308
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 1 , "Invalid control escape sequence" );
302
309
return end ;
303
310
}
304
311
305
312
if (flags & YP_UNESCAPE_FLAG_CONTROL ) {
306
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 1 , "Control escape sequence cannot be doubled." );
313
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 1 , "Control escape sequence cannot be doubled." );
307
314
return backslash + 2 ;
308
315
}
309
316
310
317
switch (backslash [2 ]) {
311
318
case '\\' :
312
- return unescape (parser , dest , dest_length , backslash + 2 , end , flags | YP_UNESCAPE_FLAG_CONTROL );
319
+ return unescape (parser , dest , dest_length , backslash + 2 , end , flags | YP_UNESCAPE_FLAG_CONTROL , error_list );
313
320
case '?' :
314
321
if (dest ) {
315
322
dest [(* dest_length )++ ] = unescape_char (0x7f , flags );
316
323
}
317
324
return backslash + 3 ;
318
325
default : {
319
326
if (!char_is_ascii_printable (backslash [2 ])) {
320
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 1 , "Invalid control escape sequence" );
327
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 1 , "Invalid control escape sequence" );
321
328
return backslash + 2 ;
322
329
}
323
330
@@ -331,31 +338,31 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
331
338
// \C-? delete, ASCII 7Fh (DEL)
332
339
case 'C' :
333
340
if (backslash + 3 >= end ) {
334
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 1 , "Invalid control escape sequence" );
341
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 1 , "Invalid control escape sequence" );
335
342
return end ;
336
343
}
337
344
338
345
if (flags & YP_UNESCAPE_FLAG_CONTROL ) {
339
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 1 , "Control escape sequence cannot be doubled." );
346
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 1 , "Control escape sequence cannot be doubled." );
340
347
return backslash + 2 ;
341
348
}
342
349
343
350
if (backslash [2 ] != '-' ) {
344
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 1 , "Invalid control escape sequence" );
351
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 1 , "Invalid control escape sequence" );
345
352
return backslash + 2 ;
346
353
}
347
354
348
355
switch (backslash [3 ]) {
349
356
case '\\' :
350
- return unescape (parser , dest , dest_length , backslash + 3 , end , flags | YP_UNESCAPE_FLAG_CONTROL );
357
+ return unescape (parser , dest , dest_length , backslash + 3 , end , flags | YP_UNESCAPE_FLAG_CONTROL , error_list );
351
358
case '?' :
352
359
if (dest ) {
353
360
dest [(* dest_length )++ ] = unescape_char (0x7f , flags );
354
361
}
355
362
return backslash + 4 ;
356
363
default :
357
364
if (!char_is_ascii_printable (backslash [3 ])) {
358
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 2 , "Invalid control escape sequence" );
365
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 2 , "Invalid control escape sequence" );
359
366
return backslash + 2 ;
360
367
}
361
368
@@ -369,22 +376,22 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
369
376
// \M-x meta character, where x is an ASCII printable character
370
377
case 'M' : {
371
378
if (backslash + 3 >= end ) {
372
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 1 , "Invalid control escape sequence" );
379
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 1 , "Invalid control escape sequence" );
373
380
return end ;
374
381
}
375
382
376
383
if (flags & YP_UNESCAPE_FLAG_META ) {
377
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 2 , "Meta escape sequence cannot be doubled." );
384
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 2 , "Meta escape sequence cannot be doubled." );
378
385
return backslash + 2 ;
379
386
}
380
387
381
388
if (backslash [2 ] != '-' ) {
382
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 2 , "Invalid meta escape sequence" );
389
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 2 , "Invalid meta escape sequence" );
383
390
return backslash + 2 ;
384
391
}
385
392
386
393
if (backslash [3 ] == '\\' ) {
387
- return unescape (parser , dest , dest_length , backslash + 3 , end , flags | YP_UNESCAPE_FLAG_META );
394
+ return unescape (parser , dest , dest_length , backslash + 3 , end , flags | YP_UNESCAPE_FLAG_META , error_list );
388
395
}
389
396
390
397
if (char_is_ascii_printable (backslash [3 ])) {
@@ -394,7 +401,7 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
394
401
return backslash + 4 ;
395
402
}
396
403
397
- yp_diagnostic_list_append (& parser -> error_list , backslash , backslash + 2 , "Invalid meta escape sequence" );
404
+ if ( error_list ) yp_diagnostic_list_append (error_list , backslash , backslash + 2 , "Invalid meta escape sequence" );
398
405
return backslash + 3 ;
399
406
}
400
407
// \n
@@ -448,8 +455,8 @@ unescape(yp_parser_t *parser, uint8_t *dest, size_t *dest_length, const uint8_t
448
455
// \c\M-x same as above
449
456
// \c? or \C-? delete, ASCII 7Fh (DEL)
450
457
//
451
- YP_EXPORTED_FUNCTION void
452
- yp_unescape_manipulate_string (yp_parser_t * parser , yp_string_t * string , yp_unescape_type_t unescape_type ) {
458
+ static void
459
+ yp_unescape_manipulate_string_or_char_literal (yp_parser_t * parser , yp_string_t * string , yp_unescape_type_t unescape_type , bool expect_single_codepoint ) {
453
460
if (unescape_type == YP_UNESCAPE_NONE ) {
454
461
// If we're not unescaping then we can reference the source directly.
455
462
return ;
@@ -511,7 +518,13 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
511
518
// This is the only type of unescaping left. In this case we need to
512
519
// handle all of the different unescapes.
513
520
assert (unescape_type == YP_UNESCAPE_ALL );
514
- cursor = unescape (parser , dest , & dest_length , backslash , end , YP_UNESCAPE_FLAG_NONE );
521
+
522
+ uint8_t flags = YP_UNESCAPE_FLAG_NONE ;
523
+ if (expect_single_codepoint ) {
524
+ flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE ;
525
+ }
526
+
527
+ cursor = unescape (parser , dest , & dest_length , backslash , end , flags , & parser -> error_list );
515
528
break ;
516
529
}
517
530
@@ -539,6 +552,16 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
539
552
yp_string_owned_init (string , allocated , dest_length + ((size_t ) (end - cursor )));
540
553
}
541
554
555
+ YP_EXPORTED_FUNCTION void
556
+ yp_unescape_manipulate_string (yp_parser_t * parser , yp_string_t * string , yp_unescape_type_t unescape_type ) {
557
+ yp_unescape_manipulate_string_or_char_literal (parser , string , unescape_type , false);
558
+ }
559
+
560
+ void
561
+ yp_unescape_manipulate_char_literal (yp_parser_t * parser , yp_string_t * string , yp_unescape_type_t unescape_type ) {
562
+ yp_unescape_manipulate_string_or_char_literal (parser , string , unescape_type , true);
563
+ }
564
+
542
565
// This function is similar to yp_unescape_manipulate_string, except it doesn't
543
566
// actually perform any string manipulations. Instead, it calculates how long
544
567
// the unescaped character is, and returns that value
@@ -564,10 +587,11 @@ yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash,
564
587
assert (unescape_type == YP_UNESCAPE_ALL );
565
588
566
589
uint8_t flags = YP_UNESCAPE_FLAG_NONE ;
567
- if (expect_single_codepoint )
590
+ if (expect_single_codepoint ) {
568
591
flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE ;
592
+ }
569
593
570
- const uint8_t * cursor = unescape (parser , NULL , 0 , backslash , parser -> end , flags );
594
+ const uint8_t * cursor = unescape (parser , NULL , 0 , backslash , parser -> end , flags , NULL );
571
595
assert (cursor > backslash );
572
596
573
597
return (size_t ) (cursor - backslash );
0 commit comments