Skip to content

Commit d75c78b

Browse files
committed
Optimize out checks in hot path for SJIS decoding
This gives about a 20% speed boost when converting SJIS to some other encoding.
1 parent 9c28385 commit d75c78b

File tree

1 file changed

+52
-27
lines changed

1 file changed

+52
-27
lines changed

ext/mbstring/libmbfl/filters/mbfilter_sjis.c

Lines changed: 52 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
410410
}
411411

412412
static const unsigned short sjis_decode_tbl1[] = {
413-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, -6204, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 8836, 9024, 9212, 9400, 9588, 9776, 9964, 10152, 10340, 10528, 10716, 10904, 11092
413+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
414414
};
415415

416416
static const unsigned short sjis_decode_tbl2[] = {
@@ -422,34 +422,52 @@ static size_t mb_sjis_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf
422422
unsigned char *p = *in, *e = p + *in_len;
423423
uint32_t *out = buf, *limit = buf + bufsize;
424424

425+
e--; /* Stop the main loop 1 byte short of the end of the input */
426+
425427
while (p < e && out < limit) {
426428
unsigned char c = *p++;
427429

428430
if (c <= 0x7F) {
429431
*out++ = c;
430432
} else if (c >= 0xA1 && c <= 0xDF) { /* Kana */
431433
*out++ = 0xFEC0 + c;
432-
} else if (c > 0x80 && c <= 0xEF && c != 0xA0 && p < e) {
434+
} else {
435+
/* Don't need to check p < e; it's not possible to go out of bounds here, due to e-- above */
433436
unsigned char c2 = *p++;
434437
/* This is only legal if c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F
435438
* But the values in the above conversion tables have been chosen such that
436439
* illegal values of c2 will always result in w > jisx0208_ucs_table_size,
437-
* so we don't need to do a separate bounds check on c2 */
440+
* so we don't need to do a separate bounds check on c2
441+
* Likewise, the values in the conversion tables are such that illegal values
442+
* for c will always result in w > jisx0208_ucs_table_size */
438443
uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
439444
if (w < jisx0208_ucs_table_size) {
440445
w = jisx0208_ucs_table[w];
441446
if (!w)
442447
w = MBFL_BAD_INPUT;
443448
*out++ = w;
444449
} else {
450+
if (c == 0x80 || c == 0xA0 || c > 0xEF) {
451+
p--;
452+
}
445453
*out++ = MBFL_BAD_INPUT;
446454
}
455+
}
456+
}
457+
458+
/* Finish up last byte of input string if there is one */
459+
if (p == e && out < limit) {
460+
unsigned char c = *p++;
461+
if (c <= 0x7F) {
462+
*out++ = c;
463+
} else if (c >= 0xA1 && c <= 0xDF) {
464+
*out++ = 0xFEC0 + c;
447465
} else {
448466
*out++ = MBFL_BAD_INPUT;
449467
}
450468
}
451469

452-
*in_len = e - p;
470+
*in_len = e - p + 1;
453471
*in = p;
454472
return out - buf;
455473
}
@@ -1057,11 +1075,17 @@ static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
10571075
while (p < e && out < limit) {
10581076
unsigned char c = *p++;
10591077

1060-
if (c < 0x80 && c != 0x5C) {
1061-
*out++ = c;
1078+
if (c <= 0x80 || c == 0xA0) {
1079+
if (c == 0x5C) {
1080+
*out++ = 0xA5;
1081+
} else if (c == 0x80) {
1082+
*out++ = 0x5C;
1083+
} else {
1084+
*out++ = c;
1085+
}
10621086
} else if (c >= 0xA1 && c <= 0xDF) {
10631087
*out++ = 0xFEC0 + c;
1064-
} else if (c > 0x80 && c <= 0xED && c != 0xA0) {
1088+
} else if (c <= 0xED) {
10651089
if (p == e) {
10661090
*out++ = MBFL_BAD_INPUT;
10671091
break;
@@ -1162,12 +1186,6 @@ static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
11621186
} else {
11631187
*out++ = MBFL_BAD_INPUT;
11641188
}
1165-
} else if (c == 0x5C) {
1166-
*out++ = 0xA5;
1167-
} else if (c == 0x80) {
1168-
*out++ = 0x5C;
1169-
} else if (c == 0xA0) {
1170-
*out++ = 0xA0;
11711189
} else if (c == 0xFD) {
11721190
*out++ = 0xA9;
11731191
} else if (c == 0xFE) {
@@ -2095,6 +2113,10 @@ int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
20952113
return 0;
20962114
}
20972115

2116+
static const unsigned short sjis_mobile_decode_tbl1[] = {
2117+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 8836, 9024, 9212, 9400, 9588, 9776, 9964, 10152, 10340, 10528, 10716, 10904, 11092, 0xFFFF, 0xFFFF, 0xFFFF
2118+
};
2119+
20982120
static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
20992121
{
21002122
unsigned char *p = *in, *e = p + *in_len;
@@ -2110,14 +2132,14 @@ static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32
21102132
} else if (c >= 0xA1 && c <= 0xDF) {
21112133
/* Kana */
21122134
*out++ = 0xFEC0 + c;
2113-
} else if (c > 0x80 && c < 0xFD && c != 0xA0) {
2135+
} else {
21142136
/* Kanji */
21152137
if (p == e) {
21162138
*out++ = MBFL_BAD_INPUT;
21172139
break;
21182140
}
21192141
unsigned char c2 = *p++;
2120-
uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
2142+
uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
21212143

21222144
if (w <= 137) {
21232145
if (w == 31) {
@@ -2161,13 +2183,14 @@ static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32
21612183
} else if (w >= (94*94) && w < (114*94)) {
21622184
w = w - (94*94) + 0xE000;
21632185
} else {
2186+
if (c == 0x80 || c == 0xA0 || c >= 0xFD) {
2187+
p--;
2188+
}
21642189
*out++ = MBFL_BAD_INPUT;
21652190
continue;
21662191
}
21672192

21682193
*out++ = w ? w : MBFL_BAD_INPUT;
2169-
} else {
2170-
*out++ = MBFL_BAD_INPUT;
21712194
}
21722195
}
21732196

@@ -2337,14 +2360,14 @@ static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t
23372360
} else if (c >= 0xA1 && c <= 0xDF) {
23382361
/* Kana */
23392362
*out++ = 0xFEC0 + c;
2340-
} else if (c > 0x80 && c < 0xFD && c != 0xA0) {
2363+
} else {
23412364
/* Kanji */
23422365
if (p == e) {
23432366
*out++ = MBFL_BAD_INPUT;
23442367
break;
23452368
}
23462369
unsigned char c2 = *p++;
2347-
uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
2370+
uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
23482371

23492372
if (w <= 137) {
23502373
if (w == 31) {
@@ -2375,7 +2398,7 @@ static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t
23752398
int snd = 0;
23762399
w = mbfilter_sjis_emoji_kddi2unicode(w, &snd);
23772400
if (!w) {
2378-
w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
2401+
w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
23792402
if (w >= (94*94) && w < (114*94)) {
23802403
w = w - (94*94) + 0xE000;
23812404
}
@@ -2393,13 +2416,14 @@ static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t
23932416
} else if (w >= (94*94) && w < (114*94)) {
23942417
w = w - (94*94) + 0xE000;
23952418
} else {
2419+
if (c == 0x80 || c == 0xA0 || c >= 0xFD) {
2420+
p--;
2421+
}
23962422
*out++ = MBFL_BAD_INPUT;
23972423
continue;
23982424
}
23992425

24002426
*out++ = w ? w : MBFL_BAD_INPUT;
2401-
} else {
2402-
*out++ = MBFL_BAD_INPUT;
24032427
}
24042428
}
24052429

@@ -2645,14 +2669,14 @@ static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
26452669
} else if (c >= 0xA1 && c <= 0xDF) {
26462670
/* Kana */
26472671
*out++ = 0xFEC0 + c;
2648-
} else if (c > 0x80 && c < 0xFD && c != 0xA0) {
2672+
} else {
26492673
/* Kanji */
26502674
if (p == e) {
26512675
*out++ = MBFL_BAD_INPUT;
26522676
break;
26532677
}
26542678
unsigned char c2 = *p++;
2655-
uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
2679+
uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
26562680

26572681
if (w <= 137) {
26582682
if (w == 31) {
@@ -2683,7 +2707,7 @@ static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
26832707
int snd = 0;
26842708
w = mbfilter_sjis_emoji_sb2unicode(w, &snd);
26852709
if (!w) {
2686-
w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
2710+
w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
26872711
if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) {
26882712
w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min];
26892713
} else if (w >= (94*94) && w < (114*94)) {
@@ -2703,13 +2727,14 @@ static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
27032727
} else if (w >= (94*94) && w < (114*94)) {
27042728
w = w - (94*94) + 0xE000;
27052729
} else {
2730+
if (c == 0x80 || c == 0xA0 || c >= 0xFD) {
2731+
p--;
2732+
}
27062733
*out++ = MBFL_BAD_INPUT;
27072734
continue;
27082735
}
27092736

27102737
*out++ = w ? w : MBFL_BAD_INPUT;
2711-
} else {
2712-
*out++ = MBFL_BAD_INPUT;
27132738
}
27142739
}
27152740

0 commit comments

Comments
 (0)