Skip to content

Commit dc98c13

Browse files
committed
Additional tests for mbstring extension
1 parent d4d52ba commit dc98c13

File tree

3 files changed

+85
-25
lines changed

3 files changed

+85
-25
lines changed

ext/mbstring/tests/mb_decode_numericentity.phpt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@ echo mb_decode_numericentity('&#1000000000', $convmap), "\n";
1919
echo mb_decode_numericentity('&#9000000000', $convmap), "\n";
2020
echo mb_decode_numericentity('&#10000000000', $convmap), "\n";
2121
echo mb_decode_numericentity('&#100000000000', $convmap), "\n";
22+
2223
echo mb_decode_numericentity('&#000000000000', $convmap), "\n";
24+
echo mb_decode_numericentity('&#00000000000', $convmap), "\n";
25+
echo mb_decode_numericentity('&#0000000000', $convmap), "\n";
26+
echo mb_decode_numericentity('&#000000000', $convmap), "\n";
2327

2428
$convmap = [];
2529
echo mb_decode_numericentity('föo', $convmap, "UTF-8")."\n";
@@ -41,5 +45,8 @@ aŒbœcŠdše€fg
4145
&#10000000000
4246
&#100000000000
4347
&#000000000000
48+
&#00000000000
49+
&#0000000000
50+
&#000000000
4451
föo
4552
mb_decode_numericentity(): Argument #2 ($convmap) must have a multiple of 4 elements

ext/mbstring/tests/mb_strcut.phpt

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,77 @@ mb_strcut()
66
output_handler=
77
--FILE--
88
<?php
9-
// TODO: Add more encodings
9+
function MBStringChars($string, $encoding) {
10+
$chars = mb_str_split($string, 1, $encoding);
11+
return '[' . implode(' ', array_map(function($char) {
12+
return join(unpack('H*', $char));
13+
}, $chars)) . ']';
14+
}
15+
1016
ini_set('include_path', __DIR__);
1117
include_once('common.inc');
1218

1319
// EUC-JP
14-
$euc_jp = '0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。';
20+
$euc_jp = pack('H*', '30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3');
21+
// UTF-8
22+
$utf8 = pack('H*', 'e288ae2045e28b856461203d2051'); // has 2 multi-byte characters: [e288ae 20 45 e28b85 64 61 20 3d 20 51]
23+
// UTF-16LE
24+
$utf16le = pack('H*', '1a043804400438043b043b04380446043004200069007300200043007900720069006c006c0069006300');
1525

16-
print mb_strcut($euc_jp, 6, 5,'EUC-JP') . "\n";
17-
print mb_strcut($euc_jp, 0, 100,'EUC-JP') . "\n";
26+
print "== EUC-JP ==\n";
27+
print MBStringChars(mb_strcut($euc_jp, 6, 5,'EUC-JP'), 'EUC-JP') . "\n";
28+
print MBStringChars(mb_strcut($euc_jp, 5, 5,'EUC-JP'), 'EUC-JP') . "\n";
29+
print MBStringChars(mb_strcut($euc_jp, 0, 100,'EUC-JP'), 'EUC-JP') . "\n";
1830

1931
$str = mb_strcut($euc_jp, 100, 10,'EUC-JP');
20-
($str === false) ? print "OK\n" : print "NG: $str\n";
32+
($str === false) ? print "OK\n" : print "No good\n";
2133

2234
$str = mb_strcut($euc_jp, -100, 10,'EUC-JP');
23-
($str !== "") ? print "OK: $str\n" : print "NG:\n";
35+
($str !== "") ? print "OK\n" : print "No good\n";
36+
37+
print "== UTF-8 ==\n";
38+
print MBStringChars(mb_strcut($utf8, 0, 0, 'UTF-8'), 'UTF-8') . "\n";
39+
print MBStringChars(mb_strcut($utf8, 0, 1, 'UTF-8'), 'UTF-8') . "\n";
40+
print MBStringChars(mb_strcut($utf8, 0, 2, 'UTF-8'), 'UTF-8') . "\n";
41+
print MBStringChars(mb_strcut($utf8, 0, 3, 'UTF-8'), 'UTF-8') . "\n";
42+
print MBStringChars(mb_strcut($utf8, 0, 4, 'UTF-8'), 'UTF-8') . "\n";
43+
print MBStringChars(mb_strcut($utf8, 0, 5, 'UTF-8'), 'UTF-8') . "\n";
44+
print MBStringChars(mb_strcut($utf8, 1, 2, 'UTF-8'), 'UTF-8') . "\n";
45+
print MBStringChars(mb_strcut($utf8, 1, 3, 'UTF-8'), 'UTF-8') . "\n";
46+
print MBStringChars(mb_strcut($utf8, 1, 4, 'UTF-8'), 'UTF-8') . "\n";
2447

48+
print "== UTF-16LE ==\n";
49+
print MBStringChars(mb_strcut($utf16le, 0, 0, 'UTF-16LE'), 'UTF-16LE') . "\n";
50+
print MBStringChars(mb_strcut($utf16le, 0, 1, 'UTF-16LE'), 'UTF-16LE') . "\n";
51+
print MBStringChars(mb_strcut($utf16le, 0, 2, 'UTF-16LE'), 'UTF-16LE') . "\n";
52+
print MBStringChars(mb_strcut($utf16le, 0, 3, 'UTF-16LE'), 'UTF-16LE') . "\n";
53+
print MBStringChars(mb_strcut($utf16le, 1, 2, 'UTF-16LE'), 'UTF-16LE') . "\n";
54+
print MBStringChars(mb_strcut($utf16le, 1, 3, 'UTF-16LE'), 'UTF-16LE') . "\n";
55+
print MBStringChars(mb_strcut($utf16le, 1, 4, 'UTF-16LE'), 'UTF-16LE') . "\n";
2556

2657
?>
2758
--EXPECT--
28-
の文
29-
0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。
59+
== EUC-JP ==
60+
[a4ce cab8]
61+
[a4b3 a4ce]
62+
[30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3]
63+
OK
3064
OK
31-
OK: 0123この文
65+
== UTF-8 ==
66+
[]
67+
[]
68+
[]
69+
[e288ae]
70+
[e288ae 20]
71+
[e288ae 20 45]
72+
[]
73+
[e288ae]
74+
[e288ae 20]
75+
== UTF-16LE ==
76+
[]
77+
[]
78+
[1a04]
79+
[1a04]
80+
[1a04]
81+
[1a04]
82+
[1a04 3804]

ext/mbstring/tests/mb_strwidth.phpt

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,30 @@ mb_strwidth()
44
<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
55
--FILE--
66
<?php
7-
// TODO: Add more encoding, strings.....
8-
//$debug = true;
97
ini_set('include_path', __DIR__);
108
include_once('common.inc');
119

1210
// EUC-JP
13-
$euc_jp = '0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。';
11+
$euc_jp = pack('H*', '30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3');
12+
// UTF-8
13+
$utf8 = '∮ E⋅da = Q';
14+
// UTF-16LE
15+
$utf16le = pack('H*', '1a043804400438043b043b04380446043004200069007300200043007900720069006c006c0069006300');
16+
// UTF-16BE
17+
$utf16be = pack('H*', '041a043804400438043b043b04380446043000200069007300200043007900720069006c006c00690063');
18+
// KOI8-R
19+
$koi8r = pack('H*', 'ebc9d2c9ccccc9c3c120697320437972696c6c6963');
1420

15-
print "1: ". mb_strwidth($euc_jp, 'EUC-JP') . "\n";
16-
/*
21+
print "1: " . mb_strwidth($euc_jp, 'EUC-JP') . "\n";
22+
print "2: " . mb_strwidth($utf8, 'UTF-8') . "\n";
23+
print "3: " . mb_strwidth($utf16le, 'UTF-16LE') . "\n";
24+
print "4: " . mb_strwidth($utf16be, 'UTF-16BE') . "\n";
25+
print "5: " . mb_strwidth($koi8r, 'KOI8-R') . "\n";
1726

18-
print "2: ". mb_strwidth($euc_jp, 'EUC-JP') . "\n";
19-
print "3: ". mb_strwidth($euc_jp, 'EUC-JP') . "\n";
20-
// Note: Did not start form -22 offset. Staring from 0.
21-
print "4: ". mb_strwidth($euc_jp, 'EUC-JP') . "\n";
22-
23-
$str = mb_strwidth($euc_jp, 100, -10,'...','EUC-JP');
24-
($str === "") ? print "5 OK\n" : print "NG: $str\n";
25-
26-
$str = mb_strwidth($euc_jp, -100, 10,'...','EUC-JP');
27-
($str !== "") ? print "6 OK: $str\n" : print "NG: $str\n";
28-
*/
2927
?>
3028
--EXPECT--
3129
1: 68
30+
2: 10
31+
3: 21
32+
4: 21
33+
5: 21

0 commit comments

Comments
 (0)