@@ -410,7 +410,7 @@ int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
410
410
}
411
411
412
412
static const unsigned short sjis_decode_tbl1 [] = {
413
- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , -6204 , -6016 , -5828 , -5640 , -5452 , -5264 , -5076 , -4888 , -4700 , -4512 , -4324 , -4136 , -3948 , -3760 , -3572 , -3384 , -3196 , -3008 , -2820 , -2632 , -2444 , -2256 , -2068 , -1880 , -1692 , -1504 , -1316 , -1128 , -940 , -752 , -564 , -376 , -188 , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , 5828 , 6016 , 6204 , 6392 , 6580 , 6768 , 6956 , 7144 , 7332 , 7520 , 7708 , 7896 , 8084 , 8272 , 8460 , 8648 , 8836 , 9024 , 9212 , 9400 , 9588 , 9776 , 9964 , 10152 , 10340 , 10528 , 10716 , 10904 , 11092
413
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0xFFFF , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , 0xFFFF , -6016 , -5828 , -5640 , -5452 , -5264 , -5076 , -4888 , -4700 , -4512 , -4324 , -4136 , -3948 , -3760 , -3572 , -3384 , -3196 , -3008 , -2820 , -2632 , -2444 , -2256 , -2068 , -1880 , -1692 , -1504 , -1316 , -1128 , -940 , -752 , -564 , -376 , -188 , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , 5828 , 6016 , 6204 , 6392 , 6580 , 6768 , 6956 , 7144 , 7332 , 7520 , 7708 , 7896 , 8084 , 8272 , 8460 , 8648 , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF , 0xFFFF
414
414
};
415
415
416
416
static const unsigned short sjis_decode_tbl2 [] = {
@@ -422,34 +422,52 @@ static size_t mb_sjis_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf
422
422
unsigned char * p = * in , * e = p + * in_len ;
423
423
uint32_t * out = buf , * limit = buf + bufsize ;
424
424
425
+ e -- ; /* Stop the main loop 1 byte short of the end of the input */
426
+
425
427
while (p < e && out < limit ) {
426
428
unsigned char c = * p ++ ;
427
429
428
430
if (c <= 0x7F ) {
429
431
* out ++ = c ;
430
432
} else if (c >= 0xA1 && c <= 0xDF ) { /* Kana */
431
433
* out ++ = 0xFEC0 + c ;
432
- } else if (c > 0x80 && c <= 0xEF && c != 0xA0 && p < e ) {
434
+ } else {
435
+ /* Don't need to check p < e; it's not possible to go out of bounds here, due to e-- above */
433
436
unsigned char c2 = * p ++ ;
434
437
/* This is only legal if c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F
435
438
* But the values in the above conversion tables have been chosen such that
436
439
* illegal values of c2 will always result in w > jisx0208_ucs_table_size,
437
- * so we don't need to do a separate bounds check on c2 */
440
+ * so we don't need to do a separate bounds check on c2
441
+ * Likewise, the values in the conversion tables are such that illegal values
442
+ * for c will always result in w > jisx0208_ucs_table_size */
438
443
uint32_t w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
439
444
if (w < jisx0208_ucs_table_size ) {
440
445
w = jisx0208_ucs_table [w ];
441
446
if (!w )
442
447
w = MBFL_BAD_INPUT ;
443
448
* out ++ = w ;
444
449
} else {
450
+ if (c == 0x80 || c == 0xA0 || c > 0xEF ) {
451
+ p -- ;
452
+ }
445
453
* out ++ = MBFL_BAD_INPUT ;
446
454
}
455
+ }
456
+ }
457
+
458
+ /* Finish up last byte of input string if there is one */
459
+ if (p == e && out < limit ) {
460
+ unsigned char c = * p ++ ;
461
+ if (c <= 0x7F ) {
462
+ * out ++ = c ;
463
+ } else if (c >= 0xA1 && c <= 0xDF ) {
464
+ * out ++ = 0xFEC0 + c ;
447
465
} else {
448
466
* out ++ = MBFL_BAD_INPUT ;
449
467
}
450
468
}
451
469
452
- * in_len = e - p ;
470
+ * in_len = e - p + 1 ;
453
471
* in = p ;
454
472
return out - buf ;
455
473
}
@@ -1057,11 +1075,17 @@ static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
1057
1075
while (p < e && out < limit ) {
1058
1076
unsigned char c = * p ++ ;
1059
1077
1060
- if (c < 0x80 && c != 0x5C ) {
1061
- * out ++ = c ;
1078
+ if (c <= 0x80 || c == 0xA0 ) {
1079
+ if (c == 0x5C ) {
1080
+ * out ++ = 0xA5 ;
1081
+ } else if (c == 0x80 ) {
1082
+ * out ++ = 0x5C ;
1083
+ } else {
1084
+ * out ++ = c ;
1085
+ }
1062
1086
} else if (c >= 0xA1 && c <= 0xDF ) {
1063
1087
* out ++ = 0xFEC0 + c ;
1064
- } else if (c > 0x80 && c <= 0xED && c != 0xA0 ) {
1088
+ } else if (c <= 0xED ) {
1065
1089
if (p == e ) {
1066
1090
* out ++ = MBFL_BAD_INPUT ;
1067
1091
break ;
@@ -1162,12 +1186,6 @@ static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
1162
1186
} else {
1163
1187
* out ++ = MBFL_BAD_INPUT ;
1164
1188
}
1165
- } else if (c == 0x5C ) {
1166
- * out ++ = 0xA5 ;
1167
- } else if (c == 0x80 ) {
1168
- * out ++ = 0x5C ;
1169
- } else if (c == 0xA0 ) {
1170
- * out ++ = 0xA0 ;
1171
1189
} else if (c == 0xFD ) {
1172
1190
* out ++ = 0xA9 ;
1173
1191
} else if (c == 0xFE ) {
@@ -2095,6 +2113,10 @@ int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
2095
2113
return 0 ;
2096
2114
}
2097
2115
2116
+ static const unsigned short sjis_mobile_decode_tbl1 [] = {
2117
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0xFFFF , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , 0xFFFF , -6016 , -5828 , -5640 , -5452 , -5264 , -5076 , -4888 , -4700 , -4512 , -4324 , -4136 , -3948 , -3760 , -3572 , -3384 , -3196 , -3008 , -2820 , -2632 , -2444 , -2256 , -2068 , -1880 , -1692 , -1504 , -1316 , -1128 , -940 , -752 , -564 , -376 , -188 , 0 , 188 , 376 , 564 , 752 , 940 , 1128 , 1316 , 1504 , 1692 , 1880 , 2068 , 2256 , 2444 , 2632 , 2820 , 3008 , 3196 , 3384 , 3572 , 3760 , 3948 , 4136 , 4324 , 4512 , 4700 , 4888 , 5076 , 5264 , 5452 , 5640 , 5828 , 6016 , 6204 , 6392 , 6580 , 6768 , 6956 , 7144 , 7332 , 7520 , 7708 , 7896 , 8084 , 8272 , 8460 , 8648 , 8836 , 9024 , 9212 , 9400 , 9588 , 9776 , 9964 , 10152 , 10340 , 10528 , 10716 , 10904 , 11092 , 0xFFFF , 0xFFFF , 0xFFFF
2118
+ };
2119
+
2098
2120
static size_t mb_sjis_docomo_to_wchar (unsigned char * * in , size_t * in_len , uint32_t * buf , size_t bufsize , unsigned int * state )
2099
2121
{
2100
2122
unsigned char * p = * in , * e = p + * in_len ;
@@ -2110,14 +2132,14 @@ static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32
2110
2132
} else if (c >= 0xA1 && c <= 0xDF ) {
2111
2133
/* Kana */
2112
2134
* out ++ = 0xFEC0 + c ;
2113
- } else if ( c > 0x80 && c < 0xFD && c != 0xA0 ) {
2135
+ } else {
2114
2136
/* Kanji */
2115
2137
if (p == e ) {
2116
2138
* out ++ = MBFL_BAD_INPUT ;
2117
2139
break ;
2118
2140
}
2119
2141
unsigned char c2 = * p ++ ;
2120
- uint32_t w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2142
+ uint32_t w = sjis_mobile_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2121
2143
2122
2144
if (w <= 137 ) {
2123
2145
if (w == 31 ) {
@@ -2161,13 +2183,14 @@ static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32
2161
2183
} else if (w >= (94 * 94 ) && w < (114 * 94 )) {
2162
2184
w = w - (94 * 94 ) + 0xE000 ;
2163
2185
} else {
2186
+ if (c == 0x80 || c == 0xA0 || c >= 0xFD ) {
2187
+ p -- ;
2188
+ }
2164
2189
* out ++ = MBFL_BAD_INPUT ;
2165
2190
continue ;
2166
2191
}
2167
2192
2168
2193
* out ++ = w ? w : MBFL_BAD_INPUT ;
2169
- } else {
2170
- * out ++ = MBFL_BAD_INPUT ;
2171
2194
}
2172
2195
}
2173
2196
@@ -2337,14 +2360,14 @@ static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t
2337
2360
} else if (c >= 0xA1 && c <= 0xDF ) {
2338
2361
/* Kana */
2339
2362
* out ++ = 0xFEC0 + c ;
2340
- } else if ( c > 0x80 && c < 0xFD && c != 0xA0 ) {
2363
+ } else {
2341
2364
/* Kanji */
2342
2365
if (p == e ) {
2343
2366
* out ++ = MBFL_BAD_INPUT ;
2344
2367
break ;
2345
2368
}
2346
2369
unsigned char c2 = * p ++ ;
2347
- uint32_t w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2370
+ uint32_t w = sjis_mobile_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2348
2371
2349
2372
if (w <= 137 ) {
2350
2373
if (w == 31 ) {
@@ -2375,7 +2398,7 @@ static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t
2375
2398
int snd = 0 ;
2376
2399
w = mbfilter_sjis_emoji_kddi2unicode (w , & snd );
2377
2400
if (!w ) {
2378
- w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2401
+ w = sjis_mobile_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2379
2402
if (w >= (94 * 94 ) && w < (114 * 94 )) {
2380
2403
w = w - (94 * 94 ) + 0xE000 ;
2381
2404
}
@@ -2393,13 +2416,14 @@ static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t
2393
2416
} else if (w >= (94 * 94 ) && w < (114 * 94 )) {
2394
2417
w = w - (94 * 94 ) + 0xE000 ;
2395
2418
} else {
2419
+ if (c == 0x80 || c == 0xA0 || c >= 0xFD ) {
2420
+ p -- ;
2421
+ }
2396
2422
* out ++ = MBFL_BAD_INPUT ;
2397
2423
continue ;
2398
2424
}
2399
2425
2400
2426
* out ++ = w ? w : MBFL_BAD_INPUT ;
2401
- } else {
2402
- * out ++ = MBFL_BAD_INPUT ;
2403
2427
}
2404
2428
}
2405
2429
@@ -2645,14 +2669,14 @@ static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
2645
2669
} else if (c >= 0xA1 && c <= 0xDF ) {
2646
2670
/* Kana */
2647
2671
* out ++ = 0xFEC0 + c ;
2648
- } else if ( c > 0x80 && c < 0xFD && c != 0xA0 ) {
2672
+ } else {
2649
2673
/* Kanji */
2650
2674
if (p == e ) {
2651
2675
* out ++ = MBFL_BAD_INPUT ;
2652
2676
break ;
2653
2677
}
2654
2678
unsigned char c2 = * p ++ ;
2655
- uint32_t w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2679
+ uint32_t w = sjis_mobile_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2656
2680
2657
2681
if (w <= 137 ) {
2658
2682
if (w == 31 ) {
@@ -2683,7 +2707,7 @@ static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
2683
2707
int snd = 0 ;
2684
2708
w = mbfilter_sjis_emoji_sb2unicode (w , & snd );
2685
2709
if (!w ) {
2686
- w = sjis_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2710
+ w = sjis_mobile_decode_tbl1 [c ] + sjis_decode_tbl2 [c2 ];
2687
2711
if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max ) {
2688
2712
w = cp932ext3_ucs_table [w - cp932ext3_ucs_table_min ];
2689
2713
} else if (w >= (94 * 94 ) && w < (114 * 94 )) {
@@ -2703,13 +2727,14 @@ static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
2703
2727
} else if (w >= (94 * 94 ) && w < (114 * 94 )) {
2704
2728
w = w - (94 * 94 ) + 0xE000 ;
2705
2729
} else {
2730
+ if (c == 0x80 || c == 0xA0 || c >= 0xFD ) {
2731
+ p -- ;
2732
+ }
2706
2733
* out ++ = MBFL_BAD_INPUT ;
2707
2734
continue ;
2708
2735
}
2709
2736
2710
2737
* out ++ = w ? w : MBFL_BAD_INPUT ;
2711
- } else {
2712
- * out ++ = MBFL_BAD_INPUT ;
2713
2738
}
2714
2739
}
2715
2740
0 commit comments