Additional tests for mbstring extension

alexdowad · alexdowad · commit dc98c1346d75 · 2020-08-31T23:15:57.000+02:00
diff --git a/ext/mbstring/tests/mb_decode_numericentity.phpt b/ext/mbstring/tests/mb_decode_numericentity.phpt
@@ -19,7 +19,11 @@ echo mb_decode_numericentity('&#1000000000', $convmap), "\n";
 echo mb_decode_numericentity('&#9000000000', $convmap), "\n";
 echo mb_decode_numericentity('&#10000000000', $convmap), "\n";
 echo mb_decode_numericentity('&#100000000000', $convmap), "\n";
+
 echo mb_decode_numericentity('&#000000000000', $convmap), "\n";
+echo mb_decode_numericentity('&#00000000000', $convmap), "\n";
+echo mb_decode_numericentity('&#0000000000', $convmap), "\n";
+echo mb_decode_numericentity('&#000000000', $convmap), "\n";
 
 $convmap = [];
 echo mb_decode_numericentity('f&ouml;o', $convmap, "UTF-8")."\n";
@@ -41,5 +45,8 @@ aŒbœcŠdše€fg
 &#10000000000
 &#100000000000
 &#000000000000
+&#00000000000
+&#0000000000
+&#000000000
 f&ouml;o
 mb_decode_numericentity(): Argument #2 ($convmap) must have a multiple of 4 elements
diff --git a/ext/mbstring/tests/mb_strcut.phpt b/ext/mbstring/tests/mb_strcut.phpt
@@ -6,26 +6,77 @@ mb_strcut()
 output_handler=
 --FILE--
 <?php
-// TODO: Add more encodings
+function MBStringChars($string, $encoding) {
+  $chars = mb_str_split($string, 1, $encoding);
+  return '[' . implode(' ', array_map(function($char) {
+    return join(unpack('H*', $char));
+  }, $chars)) . ']';
+}
+
 ini_set('include_path', __DIR__);
 include_once('common.inc');
 
 // EUC-JP
-$euc_jp = '0123����ʸ��������ܸ�Ǥ���EUC-JP��ȤäƤ��ޤ������ܸ�����ݽ�����';
+$euc_jp = pack('H*', '30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3');
+// UTF-8
+$utf8    = pack('H*', 'e288ae2045e28b856461203d2051'); // has 2 multi-byte characters: [e288ae 20 45 e28b85 64 61 20 3d 20 51]
+// UTF-16LE
+$utf16le = pack('H*', '1a043804400438043b043b04380446043004200069007300200043007900720069006c006c0069006300');
 
-print  mb_strcut($euc_jp,  6,   5,'EUC-JP') . "\n";
-print  mb_strcut($euc_jp,  0, 100,'EUC-JP') . "\n";
+print "== EUC-JP ==\n";
+print MBStringChars(mb_strcut($euc_jp,  6,   5,'EUC-JP'), 'EUC-JP') . "\n";
+print MBStringChars(mb_strcut($euc_jp,  5,   5,'EUC-JP'), 'EUC-JP') . "\n";
+print MBStringChars(mb_strcut($euc_jp,  0, 100,'EUC-JP'), 'EUC-JP') . "\n";
 
 $str = mb_strcut($euc_jp, 100, 10,'EUC-JP');
-($str === false) ? print "OK\n" : print "NG: $str\n";
+($str === false) ? print "OK\n" : print "No good\n";
 
 $str = mb_strcut($euc_jp, -100, 10,'EUC-JP');
-($str !== "") ?	print "OK: $str\n" : print "NG:\n";
+($str !== "") ?	print "OK\n" : print "No good\n";
+
+print "== UTF-8 ==\n";
+print MBStringChars(mb_strcut($utf8, 0, 0, 'UTF-8'), 'UTF-8') . "\n";
+print MBStringChars(mb_strcut($utf8, 0, 1, 'UTF-8'), 'UTF-8') . "\n";
+print MBStringChars(mb_strcut($utf8, 0, 2, 'UTF-8'), 'UTF-8') . "\n";
+print MBStringChars(mb_strcut($utf8, 0, 3, 'UTF-8'), 'UTF-8') . "\n";
+print MBStringChars(mb_strcut($utf8, 0, 4, 'UTF-8'), 'UTF-8') . "\n";
+print MBStringChars(mb_strcut($utf8, 0, 5, 'UTF-8'), 'UTF-8') . "\n";
+print MBStringChars(mb_strcut($utf8, 1, 2, 'UTF-8'), 'UTF-8') . "\n";
+print MBStringChars(mb_strcut($utf8, 1, 3, 'UTF-8'), 'UTF-8') . "\n";
+print MBStringChars(mb_strcut($utf8, 1, 4, 'UTF-8'), 'UTF-8') . "\n";
 
+print "== UTF-16LE ==\n";
+print MBStringChars(mb_strcut($utf16le, 0, 0, 'UTF-16LE'), 'UTF-16LE') . "\n";
+print MBStringChars(mb_strcut($utf16le, 0, 1, 'UTF-16LE'), 'UTF-16LE') . "\n";
+print MBStringChars(mb_strcut($utf16le, 0, 2, 'UTF-16LE'), 'UTF-16LE') . "\n";
+print MBStringChars(mb_strcut($utf16le, 0, 3, 'UTF-16LE'), 'UTF-16LE') . "\n";
+print MBStringChars(mb_strcut($utf16le, 1, 2, 'UTF-16LE'), 'UTF-16LE') . "\n";
+print MBStringChars(mb_strcut($utf16le, 1, 3, 'UTF-16LE'), 'UTF-16LE') . "\n";
+print MBStringChars(mb_strcut($utf16le, 1, 4, 'UTF-16LE'), 'UTF-16LE') . "\n";
 
 ?>
 --EXPECT--
-��ʸ
-0123����ʸ��������ܸ�Ǥ���EUC-JP��ȤäƤ��ޤ������ܸ�����ݽ�����
+== EUC-JP ==
+[a4ce cab8]
+[a4b3 a4ce]
+[30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3]
+OK
 OK
-OK: 0123����ʸ
+== UTF-8 ==
+[]
+[]
+[]
+[e288ae]
+[e288ae 20]
+[e288ae 20 45]
+[]
+[e288ae]
+[e288ae 20]
+== UTF-16LE ==
+[]
+[]
+[1a04]
+[1a04]
+[1a04]
+[1a04]
+[1a04 3804]
diff --git a/ext/mbstring/tests/mb_strwidth.phpt b/ext/mbstring/tests/mb_strwidth.phpt
@@ -4,28 +4,30 @@ mb_strwidth()
 <?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
 --FILE--
 <?php
-// TODO: Add more encoding, strings.....
-//$debug = true;
 ini_set('include_path', __DIR__);
 include_once('common.inc');
 
 // EUC-JP
-$euc_jp = '0123����ʸ��������ܸ�Ǥ���EUC-JP��ȤäƤ��ޤ������ܸ�����ݽ�����';
+$euc_jp  = pack('H*', '30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3');
+// UTF-8
+$utf8    = '∮ E⋅da = Q';
+// UTF-16LE
+$utf16le = pack('H*', '1a043804400438043b043b04380446043004200069007300200043007900720069006c006c0069006300');
+// UTF-16BE
+$utf16be = pack('H*', '041a043804400438043b043b04380446043000200069007300200043007900720069006c006c00690063');
+// KOI8-R
+$koi8r   = pack('H*', 'ebc9d2c9ccccc9c3c120697320437972696c6c6963');
 
-print  "1: ". mb_strwidth($euc_jp, 'EUC-JP') . "\n";
-/*
+print "1: " . mb_strwidth($euc_jp,  'EUC-JP')   . "\n";
+print "2: " . mb_strwidth($utf8,    'UTF-8')    . "\n";
+print "3: " . mb_strwidth($utf16le, 'UTF-16LE') . "\n";
+print "4: " . mb_strwidth($utf16be, 'UTF-16BE') . "\n";
+print "5: " . mb_strwidth($koi8r,   'KOI8-R')   . "\n";
 
-print  "2: ". mb_strwidth($euc_jp, 'EUC-JP') . "\n";
-print  "3: ". mb_strwidth($euc_jp, 'EUC-JP') . "\n";
-// Note: Did not start form -22 offset. Staring from 0.
-print  "4: ". mb_strwidth($euc_jp, 'EUC-JP') . "\n";
-
-$str = mb_strwidth($euc_jp, 100, -10,'...','EUC-JP');
-($str === "") ? print "5 OK\n" : print "NG: $str\n";
-
-$str = mb_strwidth($euc_jp, -100, 10,'...','EUC-JP');
-($str !== "") ?	print "6 OK: $str\n" : print "NG: $str\n";
-*/
 ?>
 --EXPECT--
 1: 68
+2: 10
+3: 21
+4: 21
+5: 21