Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
57e82dc
mb_str_split() added
legale Dec 23, 2018
2b99b9d
mb_str_split() error fixed
legale Dec 23, 2018
47ce704
mb_str_split() error fixed
legale Dec 23, 2018
63c8e79
Update .gitignore
petk Dec 23, 2018
bef21b5
new mb_str_split() using libmbfl library functions
legale Dec 28, 2018
91a7309
new mb_str_split() using libmbfl library functions
legale Dec 28, 2018
223a8a9
mb_str_split() optional argument "encoding" added + minor changes
legale Dec 28, 2018
79978f7
mb_str_split() minor changes in function argument names
legale Dec 28, 2018
cbdf106
mb_str_split() tests
legale Dec 28, 2018
64cd160
minor changes to pass appveyor tests
legale Dec 28, 2018
24082c4
minor src and tests refactoring
legale Jan 2, 2019
781c1d6
// comments replaced with /**/
legale Jan 2, 2019
62bcf9e
more tests
legale Jan 5, 2019
7f2ce93
minor tests changes
legale Jan 10, 2019
314619e
rerun test
legale Jan 10, 2019
e09d115
mb_str_split function rewritten completely
legale Jan 13, 2019
d9bb662
mbfl collector_substr moved back to static
legale Jan 13, 2019
2f412a3
tests improved
legale Jan 13, 2019
5a64309
trying to fix a memory leak 1
legale Jan 13, 2019
4b0523f
tests minor changes
legale Jan 13, 2019
f6ee1fa
tests minor changes
legale Jan 13, 2019
12c5928
refactoring + faster way to parse UTF-16
legale Jan 17, 2019
e202945
refactoring & more tests
legale Jan 17, 2019
65eaec9
minor comment changes
legale Jan 17, 2019
87824d8
minor changes
legale Jan 17, 2019
95e0647
comments changes
legale Jan 18, 2019
c0b3f57
`git checkout ecd533d -- ext/mbstring/libmbfl/` path /ext/mbstring/li…
legale Jan 18, 2019
f036661
UTF-16 parse bug fixed and related test added
legale Jan 19, 2019
d868059
utf-16 optimization
legale Jan 22, 2019
ad77e03
endian.h replaced with brg_endian.h
legale Jan 22, 2019
2ff7061
minor changes + bug fix in php_mb_mbchar_bytes_ex()
legale Jan 23, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 57 additions & 3 deletions ext/mbstring/libmbfl/filters/mbfilter_utf16.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,68 @@
#include "mbfilter.h"
#include "mbfilter_utf16.h"

/* Macros to create char length table */
#define B2(n) n,n
#define B4(n) B2(n),B2(n)
#define B8(n) B4(n),B4(n)
#define B16(n) B8(n),B8(n)
#define B32(n) B16(n),B16(n)
#define B64(n) B32(n),B32(n)
#define B128(n) B64(n),B64(n)
#define B256(n) B128(n),B128(n)
#define B512(n) B256(n),B256(n)
#define B1024(n) B512(n),B512(n)
#define B2048(n) B1024(n),B1024(n)
#define B4096(n) B2048(n),B2048(n)
#define B8192(n) B4096(n),B4096(n)
#define B16384(n) B8192(n),B8192(n)

/* UTF-16 character length table */
const char unsigned mblen_table_utf16_le[65536] = {
B16384(2),
B16384(2),
B16384(2),
B4096(2),
B2048(2),
B1024(4), /* surrogate pairs: 0xD800-0xDFFF. High surrogate first: 0xD800, last: 0xDBFF */
B1024(2), /* Low surrogate first: 0xDC00, last: 0xDFFF */
B8192(2),
};

/* macro to make swapped length table */
#define BY B128(2),B64(2),B16(2),B8(2),B4(4),B4(2),B32(2)

/* swapped bytes table */
const char unsigned mblen_table_utf16_be[65536] = {
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,

BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,

BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,

BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,BY,
};

static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};

const mbfl_encoding mbfl_encoding_utf16 = {
mbfl_no_encoding_utf16,
"UTF-16",
"UTF-16",
(const char *(*)[])&mbfl_encoding_utf16_aliases,
NULL,
mblen_table_utf16_be,
MBFL_ENCTYPE_MWC2BE,
&vtbl_utf16_wchar,
&vtbl_wchar_utf16
Expand All @@ -52,7 +106,7 @@ const mbfl_encoding mbfl_encoding_utf16be = {
"UTF-16BE",
"UTF-16BE",
NULL,
NULL,
mblen_table_utf16_be,
MBFL_ENCTYPE_MWC2BE,
&vtbl_utf16be_wchar,
&vtbl_wchar_utf16be
Expand All @@ -63,7 +117,7 @@ const mbfl_encoding mbfl_encoding_utf16le = {
"UTF-16LE",
"UTF-16LE",
NULL,
NULL,
mblen_table_utf16_le,
MBFL_ENCTYPE_MWC2LE,
&vtbl_utf16le_wchar,
&vtbl_wchar_utf16le
Expand Down
142 changes: 142 additions & 0 deletions ext/mbstring/libmbfl/mbfl/brg_endian.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.

LICENSE TERMS

The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:

1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;

2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;

3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.

DISCLAIMER

This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
*/

#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H

#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */

#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
#endif
#endif

/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */

#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif

#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( _LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif

#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif

#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif

/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if !defined(PLATFORM_BYTE_ORDER)

#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN

#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN

#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# else
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#else
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
#endif

#endif

#endif
Loading