Browse files

renamed jstring to mbstring.

  • Loading branch information...
1 parent 9907826 commit 09197a4531025ca26271b5c52772cc0419c83e34 Rui Hirokawa committed May 1, 2001
View
2 ext/mbstring/CREDITS
@@ -0,0 +1,2 @@
+Multibyte (Japanese) String Functions
+Tsukada Takuya
View
5 ext/mbstring/EXPERIMENTAL
@@ -0,0 +1,5 @@
+this extension is experimental,
+its functions may change their names
+or move to extension all together
+so do not rely to much on them
+you have been warned!
View
7 ext/mbstring/Makefile.in
@@ -0,0 +1,7 @@
+# $Id$
+
+LTLIBRARY_NAME = libmbstring.la
+LTLIBRARY_SOURCES = mbfilter_ja.c mbfilter.c mbstring.c
+LTLIBRARY_SHARED_NAME = mbstring.la
+
+include $(top_srcdir)/build/dynlib.mk
View
774 ext/mbstring/README_PHP3-i18n-ja
@@ -0,0 +1,774 @@
+==========================================
+ README for I18N Package
+==========================================
+
+o Name and location of package
+
+Name: php-3.0.18-i18n-ja-2
+Location: http://www.happysize.co.jp/techie/php-ja-jp/
+ ftp://ftp.happysize.co.jp/php-ja-jp/
+ http://php.vdomains.org/
+ ftp://ftp.vdomains.org/pub/php-ja-jp/
+ http://php.jpnnet.com/
+
+Currently, this I18N version of PHP only adds Japanese support to base
+PHP. It allows you to use Japanese in scripts, as well as conversion
+between various Japanese encodings. It will work perfectly fine with
+ASCII with i18n option enabled. (note: executable is bit larger due
+to UNICODE table). The basic design aproach is to allow for other
+languages to be added in the future. Developers are encourage to join
+us!
+
+For more information on Japanese encodings, please refer to the
+section "Additional Notes."
+
+
+o What is this package?
+
+This package allows you to handle multiple Japanese encodings (SJIS, EUC,
+UTF-8, JIS) in PHP. If you find any bugs in this package, please report
+them to the appropriate mailing list. For now, the PHP-jp mailing list
+is the best place for this.
+
+PHP-jp ML mailto:PHP-jp@sidecar.ics.es.osaka-u.ac.jp
+ http://sidecar.ics.es.osaka-u.ac.jp/php-jp/
+ (discussions are in Japanese)
+
+
+o Who should use this
+
+Due to lack of documentation, it's not intended for beginners. If
+something goes wrong, be prepared to fix it on your own.
+
+
+o Warranty and Copyright
+
+There is no warranty with this package. Use it at your own risk.
+
+Please refer to the source code for the copyrights. In general, each
+program's copyright is owned by the programmer. Unless you obey the
+copyright holders restrictions, you are not allowed to use it in any
+form.
+
+
+o Redistribution
+
+As described in the source code, this package and the components are
+allowed to be redistributed with certain restrictions.
+
+Due to this package being still in beta, please try to redistribute
+it as an entire package. Please try not to distribute it as a form
+of patch. Because we would prefer to have this package distributed
+as one single package (not patch of patch of patch), avoid releasing
+any patch to this package.
+
+
+o Who made this
+
+A team of volunteers, PHP3 Internationalization, has been contributing
+their free time producing it. Although we are not related to the core
+PHP programmers, we are hoping to have our modifications merged into the
+core distribution in the near future. Thus, we did not call this a
+"Japanese Patch" (or distribution). Our final goal is to have true
+i18nized PHP!
+
+For anyone interested in this project, please drop us a line.
+
+Contact Address:
+ phpj-dev@kage.net
+ (Discussions are in Japanese, but feel free to write us in English)
+
+Webpage (English and Japanese):
+ http://php.jpnnet.com/
+
+Project Outline (Japanese):
+ http://www.happysize.co.jp/techie/php-ja-jp/spec.htm
+
+Developers:
+ Hironori Sato <satoh@jpnnet.com>
+ Shigeru Kanemoto <sgk@happysize.co.jp>
+ Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
+ U. Kenkichi <kenkichi@axes.co.jp>
+ Tateyama <tateyan@amy.hi-ho.ne.jp>
+ Other gracious contributors
+
+
+o Future plans
+
+- fulfilling what's written in outline
+- support for other languages other than Japanese
+- make the character conversion as a library (?)
+- more testing
+
+
+o Special Thanks to
+
+PHP Japanese webpage maintainer, Hirokawa-san
+ http://www.cityfujisawa.ne.jp/%7Elouis/apps/phpfi/
+PHP-JP ML's Yamamoto-san
+ http://sidecar.ics.es.osaka-u.ac.jp/php-jp/
+Previous jp-patch developers
+
+
+
+==========================================
+ Advantages of using I18N package
+==========================================
+
+- allows you to use various character encodings for script files and
+ http output
+- distinguish character encoding in POST/GET/COOKIE
+- proper mail output using JIS as body and MIME/Base64/JIS subject
+- if http output's Content-Type is text/html, it will set proper charset
+- stable character encoding conversion
+- multibyte regex
+
+
+
+==========================================
+ Installation
+==========================================
+
+o Summary
+
+Add --enable-i18n option when running configure. For your own setup,
+add any other appropriate options as well.
+
+Don't forget to copy php3.ini-dist to desired location.
+(ex. /usr/local/lib/php3.ini)
+
+If you have already installed PHP3, copy all the entries in php3.ini-dist
+which start with "i18n.xxxx" to php3.ini.
+
+
+o configure option
+ --enable-i18n
+ include i18n features
+
+ --enable-mbregex
+ include multibyte regex library
+ (without i18n enabled, mbregex functions will not function)
+
+
+o creating cgi version
+
+ % tar xvzf php-3.0.18-i18n-ja-2.tar.gz
+ % cd php-3.0.18-i18n-ja-2
+ % ./configure --enable-i18n --enable-mbregex
+ % make
+
+
+o creating Apache version (regular module)
+
+ % tar xvzf php-3.0.18-i18n-ja-2.tar.gz
+ % tar xvzf apache_1.3.x.tar.gz
+ % cd apache_1.3.x
+ % ./configure
+ % cd ../php-3.0.18-i18n-ja-2
+ % ./configure --with-apache=../apache_1.3.x --enable-i18n --enable-mbregex
+ % make
+ % make install
+ % cd ../apache_1.3.x
+ % ./configure --activate-module=src/modules/php3/libphp3.a
+ % make
+ % make install
+
+
+o creating Apache DSO version
+
+ create DSO capable Apache first
+ % tar xvzf apache_1.3.x.tar.gz
+ % cd apache-1.3.x
+ % ./configure --enable-shared=max
+ % make
+ % make install
+
+ now create php3
+ % cd php-3.0.18-i18n-ja-2
+ % ./configure --with-apxs=/usr/local/apache/bin/apxs --enable-i18n \
+ --enable-mbregex
+ % make
+ % make install
+
+
+==========================================
+ Additional Notes
+==========================================
+
+o Multibyte regex library
+
+From beta4, we have included the multibyte (mb) regex library which comes with
+Ruby. With this addition, you can now use regex in EUC, SJIS and UTF-8
+encoding. To avoid any conflicts with HSREGEX included with Apache,
+each function name has been changed. Therefore, mb regex functions are
+named differently from the original ereg functions in PHP. The character
+encoding used in mb regex is configured in i18n.internal_encoding.
+
+
+o Binary Output
+
+If http output encoding is set to other than 'pass', conversion of encoding
+from internal encoding to http output is done automatically. Thus,
+if you prefer to spit out anything in raw binary format, your data
+may be corrupted. In such event, set http_output to 'pass'.
+
+ex.
+ <?
+ i18n_http_output("pass");
+ ...
+ echo $the_binary_data_string;
+ ?>
+
+
+o Content-Type
+
+Depending on the setting of http_output, PHP will output the proper charset.
+ex. Content-Type: text/html; charset="..."
+
+Be aware of following:
+
+- If you set Content-Type header using header() function, that will
+ override the automatic addition of charset.
+- Be cautious when you set i18n_http_output, since if any output is
+ made prior to this, proper header may have been sent out to the
+ client already.
+
+
+o In the event of trouble
+
+If you find any bugs or trouble, please contact us at the above address.
+It may help us to track the problem if you send us the script as well.
+
+If you encounter any memory related error such as segmentation violation,
+add --enable-debug when you run configure. This will give you more
+detail information on where error has occurred. The error is stored
+in the server log or regular http output in CGI mode.
+
+
+o About Japanese encodings
+
+Due to historical reason, there are multiple character encodings used
+for Japanese. The most common encodings are: SJIS, EUC, JIS, and UTF-8.
+Here are (very) brief description of them:
+
+EUC
+ commonly used in UNIX environment
+ 8bit-8bit combo
+ always >=0x80
+
+SJIS
+ commonly used in Mac or PCs
+ similar to EUC
+ mostly 8bit-8bit (some 8bit-7bit)
+ mostly >=0x80
+ there are some halfwidth (size of ASCII) multibytes
+
+JIS
+ commonly used in 7bit environment (nntp and smtp)
+ starts with escaping char, \033 and a few more characters
+
+UTF-8
+ 16bit+ encoding
+ defines many languages existing in this world
+ see http://www.unicode.org/ for more detail
+
+Because of having all these character encodings, PHP needs to translate
+between these encodings on the fly. Also, the addition of the mb regex
+library allows you to handle mb strings without fear of getting mb char
+chopped in half.
+
+Since Japanese is not the only language with multiple encodings, we
+encourage other developers to modify our code to suit your needs. We
+definitely need people to work with Korean, Chinese (both traditional
+and simplified), and Russian. Let us know if you are interested in
+this project!
+
+
+
+==========================================
+ php3.ini setting
+==========================================
+
+The following init options will allow you to change the default settings.
+Define these settings in the global section of php3.ini.
+
+All keywords are case-insensitive.
+
+o Encoding naming
+
+ For each encoding, there are three names: standarized, alias, MIME
+
+ - UTF-8
+ standard: UTF-8
+ alias: N/A
+ mime: UTF-8
+
+ - ASCII
+ standard: ASCII
+ alias: N/A
+ mime: US-ASCII
+
+ - Japanese EUC
+ standard: EUC-JP
+ alias: EUC, EUC_JP, eucJP, x-euc-jp
+ mime: EUC-JP
+
+ - Shift JIS
+ standard: SJIS
+ alias: x-sjis, MS_Kanji
+ mime: Shift_JIS
+
+ - JIS
+ standard: JIS
+ alias: N/A
+ mime: ISO-2022-JP
+
+ - Quoted-Printable
+ standard: Quoted-Printable
+ alias: qprint
+ mime: N/A
+
+ - BASE64
+ standard: BASE64
+ alias: N/A
+ mime: N/A
+
+ - no conversion
+ standard: pass
+ alias: none
+ mime: N/A
+
+ - auto encoding detection
+ standard: auto
+ alias: unknown
+ mime: N/A
+
+ * N/A - Not Applicapable
+
+o i18n.http_output - default http output encoding
+
+ i18n.http_output = EUC-JP|SJIS|JIS|UTF-8|pass
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+ pass: no conversion
+
+ The default is pass (internal encoding is used)
+ It can be re-configured on the fly using i18n_http_output().
+
+
+o i18n.internal_encoding - internal encoding
+
+ i18n.internal_encoding = EUC-JP|SJIS|UTF-8
+ EUC-JP : EUC
+ SJIS: SJIS
+ UTF-8: UTF-8
+
+ The default is EUC-JP.
+
+ PHP parser is designed based on using ISO-8859-1. For other
+ encodings, following conditions have to be satisfied in order
+ to use them:
+ - per byte encoding
+ - single byte charactor in range of 00h-7fh which is compatible
+ with ASCII
+ - multibyte without 00h-7fh
+ In case of Japanese, EUC-JP and UTF-8 are the only encoding that
+ meets this criteria.
+
+ If i18n.internal_encoding and i18n.http_output differs, conversion
+ takes place at the time of output. If you convert any data within
+ PHP scripts to URL encoding, BASE64 or Quoted-Printable, encoding
+ stays as defined in i18n.internal_encoding. Thus, if you would
+ prefer to encode in compliance with i18n.http_output, you need
+ to manually convert encoding.
+
+ ex. $str = urlencode( i18n_convert($str, i18n_http_output()) );
+
+ Encoding such as ISO-2022-** and HZ encoding which uses escape
+ sequences can not be used as internal encoding. If used, they
+ result in following errors:
+ - parser pukes funky error
+ - magic_quotes_*** breaks encoding (SJIS may have similar problem)
+ - string manipulation and regex will malfunction
+
+
+o i18n.script_encoding - script encoding
+
+ i18n.script_encoding = auto|EUC-JP|SJIS|JIS|UTF-8
+ auto: automatic
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+
+ The default is auto.
+ The script's encoding is converted to i18n.internal_encoding before
+ entering the script parser.
+
+ Be aware that auto detection may fail under some conditions.
+ For best auto detection, add multibyte charactor at begining of
+ script.
+
+
+o i18n.http_input - handling of http input (GET/POST/COOKIE)
+
+ i18n.http_input = pass|auto
+ auto: auto conversion
+ pass: no conversion
+
+ The default is auto.
+ If set to pass, no conversion will take place.
+ If set to auto, it will automatically detect the encoding. If
+ detection is successful, it will convert to the proper internal
+ encoding. If not, it will assume the input as defined in
+ i18n.http_input_default.
+
+o i18n.http_input_default - default http input encoding
+
+ i18n.http_input_default = pass|EUC-JP|SJIS|JIS|UTF-8
+ pass: no conversion
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+
+ The default is pass.
+ This option is only effective as long as i18n.http_input is set to
+ auto. If the auto detection fails, this encoding is used as an
+ assumption to convert the http input to the internal encoding.
+ If set to pass, no conversion will take place.
+
+o sample settings
+
+ 1) For most flexibility, we recommend using following example.
+ i18n.http_output = SJIS
+ i18n.internal_encoding = EUC-JP
+ i18n.script_encoding = auto
+ i18n.http_input = auto
+ i18n.http_input_default = SJIS
+
+ 2) To avoid unexpected encoding problems, try these:
+
+ i18n.http_output = pass
+ i18n.internal_encoding = EUC-JP
+ i18n.script_encoding = pass
+ i18n.http_input = pass
+ i18n.http_input_default = pass
+
+
+
+==========================================
+ PHP functions
+==========================================
+
+The following describes the additional PHP functions.
+
+All keywords are case-insensitive.
+
+o i18n_http_output(encoding)
+o encoding = i18n_http_output()
+
+ This will set the http output encoding. Any output following this
+ function will be controlled by this function. If no argument is given,
+ the current http output encode setting is returned.
+
+ encodings
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+ pass: no conversion
+
+ NONE is not allowed
+
+
+o encoding = i18n_internal_encoding()
+
+ Returns the current internal encoding as a string.
+
+ internal encoding
+ EUC-JP : EUC
+ SJIS: SJIS
+ UTF-8: UTF-8
+
+
+o encoding = i18n_http_input()
+
+ Returns http input encoding.
+
+ encodings
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+ pass: no conversion (only if i18n.http_input is set to pass)
+
+
+o string = i18n_convert(string, encoding)
+ string = i18n_convert(string, encoding, pre-conversion-encoding)
+
+ Returns converted string in desired encoding. If
+ pre-conversion-encoding is not defined, the given
+ string is assumed to be in internal encoding.
+
+ encoding
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+ pass: no conversion
+
+ pre-conversion-encoding
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+ pass: no conversion
+ auto: auto detection
+
+
+o encoding = i18n_discover_encoding(string)
+
+ Encoding of the given string is returned (as a string).
+
+ encoding
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+ ASCII: ASCII (only 09h, 0Ah, 0Dh, 20h-7Eh)
+ pass: unable to determine (text is too short to determine)
+ unknown: unknown or possible error
+
+
+o int = mbstrlen(string)
+o int = mbstrlen(string, encoding)
+
+ Returns character length of a given string. If no encoding is defined,
+ the encoding of string is assumed to be the internal encoding.
+
+ encoding
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+ auto: automatic
+
+
+o int = mbstrpos(string1, string2)
+o int = mbstrpos(string1, string2, start)
+o int = mbstrpos(string1, string2, start, encoding)
+
+ Same as strpos. If no encoding is defined, the encoding of string
+ is assumed to be the internal encoding.
+
+ encoding
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+
+
+o int = mbstrrpos(string1, string2)
+o int = mbstrrpos(string1, string2, encoding)
+
+ Same as strrpos. If no encoding is defined, the encoding of string
+ is assumed to be the internal encoding.
+
+ encoding
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+
+
+o string = mbsubstr(string, position)
+o string = mbsubstr(string, position, length)
+o string = mbsubstr(string, position, length, encoding)
+
+ Same as substr. If no encoding is defined, the encoding of string
+ is assumed to be the internal encoding.
+
+ encoding
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+
+
+o string = mbstrcut(string, position)
+o string = mbstrcut(string, position, length)
+o string = mbstrcut(string, position, length, encoding)
+
+ Same as subcut. If position is the 2nd byte of a mb character, it will cut
+ from the first byte of that character. It will cut the string without
+ chopping a single byte from a mb character. In another words, if you
+ set length to 5, you will only get two mb characters. If no encoding
+ is defined, the encoding of string is assumed to be the internal encoding.
+
+ encoding
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+
+
+o string = i18n_mime_header_encode(string)
+ MIME encode the string in the format of =?ISO-2022-JP?B?[string]?=.
+
+
+o string = i18n_mime_header_decode(string)
+ MIME decodes the string.
+
+
+o string = i18n_ja_jp_hantozen(string)
+o string = i18n_ja_jp_hantozen(string, option)
+o string = i18n_ja_jp_hantozen(string, option, encoding)
+
+ Conversion between full width character and halfwidth character.
+
+ option
+ The following options are allowed. The default is "KV".
+ Acronym: FW = fullwidth, HW = halfwidth
+
+ "r" : FW alphabet -> HW alphabet
+
+ "R" : HW alphabet -> FW alphabet
+
+ "n" : FW number -> HW number
+
+ "N" : HW number -> FW number
+
+ "a" : FW alpha numeric (21h-7Eh) -> HW alpha numeric
+
+ "A" : HW alpha numeric (21h-7Eh) -> FW alpha numeric
+
+ "k" : FW katakana -> HW katakana
+
+ "K" : HW katakana -> FW katakana
+
+ "h" : FW hiragana -> HW hiragana
+
+ "H" : HW hiragana -> FW katakana
+
+ "c" : FW katakana -> FW hiragana
+
+ "C" : FW hiragana -> FW katakana
+
+ "V" : merge dakuon character. only works with "K" and "H" option
+
+ encoding
+ If no encoding is defined, the encoding of string is assumed to be
+ the internal encoding.
+ EUC-JP : EUC
+ SJIS: SJIS
+ JIS : JIS
+ UTF-8: UTF-8
+
+
+int = mbereg(regex_pattern, string, string)
+int = mberegi(regex_pattern, string, string)
+ mb version of ereg() and eregi()
+
+
+string = mbereg_replace(regex_pattern, string, string)
+string = mberegi_replace(regex_pattern, string, string)
+ mb version of ereg_replace() and eregi_replace()
+
+
+string_array = mbsplit(regex, string, limit)
+ mb version of split()
+
+
+
+==========================================
+ FAQ
+==========================================
+
+Here, we have gathered some commonly asked questions on PHP-jp mailing
+list.
+
+o To use Japanese in GET method
+
+If you need to assign Japanese text in GET method with argument, such as;
+xxxx.php?data=<Japanese text>, use urlencode function in PHP. If not,
+text may not be passed onto action php properly.
+
+ex: <a href="hoge.php?data=<? echo urlencode($data) ?>">Link</a>
+
+
+o When passing data via GET/POST/COOKIE, \ character sneaks in
+
+When using SJIS as internal encoding, or passed-on data includes '"\,
+PHP automatically inserts escaping character, \. Set magic_quotes_gpc
+in php3.ini from On to Off. An alternative work around to this problem
+is to use StripSlashes().
+
+If $quote_str is in SJIS and you would like to extract Japanese text,
+use ereg_replace as follows:
+
+ereg_replace(sprintf("([%c-%c%c-%c]\\\\)\\\\",0x81,0x9f,0xe0,0xfc),
+ "\\1",$quote_str);
+
+This will effectively extract Japanese text out of $quote_str.
+
+
+o Sometimes, encoding detection fails
+
+If i18n_http_input() returns 'pass', it's likely that PHP failed to
+detect whether it's SJIS or EUC. In such case, use <input type=hidden
+value="some Japanese text"> to properly detect the incoming text's
+encoding.
+
+
+
+==========================================
+ Japanese Manual
+==========================================
+Translated manual done by "PHP Japanese Manual Project" :
+
+http://www.php.net/manual/ja/manual.php
+
+Starting 3.0.18-i18n-ja, we have removed doc-jp from tarball package.
+
+
+==========================================
+ Change Logs
+==========================================
+
+o 2000-10-28, Rui Hirokawa <hirokawa@php.net>
+
+This patch is derived from php-3.0.15-i18n-ja as well as php-3.0.16 by
+Kuwamura applied to original php-3.0.18. It also includes following fixes:
+
+1) allows you to set charset in mail().
+2) fixed mbregex definitions to avoid conflicts with system regex
+3) php3.ini-dist now uses PASS for http_output instead of SJIS
+
+o 2000-11-24, Hironori Sato <satoh@yyplanet.com>
+
+Applied above patched and added detection for gdImageStringTTF in configure.
+Following setups are known to work:
+
+gd-1.3-6, gd-devel-1.3-6, freetype-1.3.1-5, freetype-devel-1.3.1-5
+ ImageTTFText($im,$size,$angle,$x1,$y1,$color,"/path/to/font.ttf",
+ i18n_convert("���ܸ�", "UTF-8"));
+ ImageGif($im);
+
+gd-1.7.3-1k1, gd-devel-1.7.3-1k1, freetype-1.3.1-5, freetype-devel-1.3.1-5
+ ImageTTFText($im,$size,$angle,$x1,$y1,$color,"/path/to/font.ttf","���ܸ�");
+ ImagePng($im);
+ * i18n_internal_encoding = EUC ��� SJIS
+
+For any gd libraries before 1.6.2, you need to use i18n_convert. For
+gd-1.5.2/3, upgrade to anything above 1.7 to use ImageTTFText without
+using i18n_convert. As long as you have internal_encoding set to EUC or
+SJIS, ImageTTFText should work without mojibake. Again, make sure you
+have i18n_http_output("pass") before calling ImageGif, ImagePng, ImageJpeg!
+
+o 2000-12-09, Rui Hirokawa <hirokawa@php.net>
+
+Fixed mail() which was causing segmentation fault when header was null.
+
View
23 ext/mbstring/config.m4
@@ -0,0 +1,23 @@
+dnl $Id$
+dnl config.m4 for extension mbstring
+
+PHP_ARG_ENABLE(mbstring, whether to enable multibyte string support,
+[ --enable-mbstring Enable multibyte string support])
+
+if test "$PHP_MBSTRING" != "no"; then
+ AC_DEFINE(HAVE_MBSTRING,1,[ ])
+ PHP_EXTENSION(mbstring, $ext_shared)
+fi
+
+AC_MSG_CHECKING(whether to enable japanese encoding translation)
+AC_ARG_ENABLE(mbstr_enc_trans,
+[ --enable-mbstr-enc-trans Enable japanese encoding translation],[
+ if test "$enableval" = "yes" ; then
+ AC_DEFINE(MBSTR_ENC_TRANS, 1, [ ])
+ AC_MSG_RESULT(yes)
+ else
+ AC_MSG_RESULT(no)
+ fi
+],[
+ AC_MSG_RESULT(no)
+])
View
8,244 ext/mbstring/mbfilter.c
8,244 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
539 ext/mbstring/mbfilter.h
@@ -0,0 +1,539 @@
+/* charset=UTF-8 */
+
+/*
+ * "streamable kanji code filter and converter"
+ *
+ * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
+ *
+ * This software is released under the GNU Lesser General Public License.
+ * Please read the following detail of the licence (in japanese).
+ *
+ * ◆使用許諾条件◆
+ *
+ * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
+ * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
+ * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
+ * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
+ * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
+ * することはできません。
+ *
+ * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
+ * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
+ * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
+ * による許諾を得る必要があります。
+ *
+ * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
+ * ます。「GNU Lesser General Public License」とは、これまでLibrary General
+ * Public Licenseと呼ばれていたものです。
+ * http://www.gnu.org/ --- GNUウェブサイト
+ * http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
+ * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
+ *
+ * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
+ * はありません。
+ *
+ * ◆保証内容◆
+ *
+ * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
+ * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
+ * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
+ * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
+ * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
+ * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
+ * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
+ * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
+ * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
+ * 契約・規定に優先します。
+ *
+ * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
+ *
+ * 〒112-0004東京都文京区後楽1-1-13小野ビル7F
+ * 株式会社ハッピーサイズ
+ * Phone: 03-5803-2964, Fax: 03-5803-2965
+ * http://www.happysize.co.jp/ mailto:info@happysize.co.jp
+ *
+ * ◆著者◆
+ *
+ * 金本 茂 <sgk@happysize.co.jp>
+ *
+ * ◆履歴◆
+ *
+ * 1998/11/10 sgk implementation in C++
+ * 1999/4/25 sgk Cで書きなおし。
+ * 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
+ * 1999/6/?? Unicodeサポート。
+ * 1999/6/22 sgk ライセンスをLGPLに変更。
+ *
+ */
+
+/*
+ * Unicode support
+ *
+ * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
+ * All rights reserved.
+ *
+ */
+
+/*
+ *
+ * streamable kanji code filter and converter
+ * mbfl : Multi Byte FiLter Liblary
+ *
+ */
+
+/* $Id$ */
+
+
+#ifndef MBFL_MBFILTER_H
+#define MBFL_MBFILTER_H
+
+enum mbfl_no_language {
+ mbfl_no_language_invalid = -1,
+ mbfl_no_language_uni,
+ mbfl_no_language_min,
+ mbfl_no_language_catalan, /* ca */
+ mbfl_no_language_danish, /* da */
+ mbfl_no_language_german, /* de */
+ mbfl_no_language_english, /* en */
+ mbfl_no_language_estonian, /* et */
+ mbfl_no_language_greek, /* el */
+ mbfl_no_language_spanish, /* es */
+ mbfl_no_language_french, /* fr */
+ mbfl_no_language_italian, /* it */
+ mbfl_no_language_japanese, /* ja */
+ mbfl_no_language_korean, /* ko */
+ mbfl_no_language_dutch, /* nl */
+ mbfl_no_language_polish, /* pl */
+ mbfl_no_language_portuguese, /* pt */
+ mbfl_no_language_swedish, /* sv */
+ mbfl_no_language_chinese, /* zh */
+ mbfl_no_language_max
+};
+
+enum mbfl_no_encoding {
+ mbfl_no_encoding_invalid = -1,
+ mbfl_no_encoding_pass,
+ mbfl_no_encoding_auto,
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_byte2be,
+ mbfl_no_encoding_byte2le,
+ mbfl_no_encoding_byte4be,
+ mbfl_no_encoding_byte4le,
+ mbfl_no_encoding_base64,
+ mbfl_no_encoding_qprint,
+ mbfl_no_encoding_7bit,
+ mbfl_no_encoding_8bit,
+ mbfl_no_encoding_charset_min,
+ mbfl_no_encoding_ucs4,
+ mbfl_no_encoding_ucs4be,
+ mbfl_no_encoding_ucs4le,
+ mbfl_no_encoding_ucs2,
+ mbfl_no_encoding_ucs2be,
+ mbfl_no_encoding_ucs2le,
+ mbfl_no_encoding_utf32,
+ mbfl_no_encoding_utf32be,
+ mbfl_no_encoding_utf32le,
+ mbfl_no_encoding_utf16,
+ mbfl_no_encoding_utf16be,
+ mbfl_no_encoding_utf16le,
+ mbfl_no_encoding_utf8,
+ mbfl_no_encoding_utf7,
+ mbfl_no_encoding_utf7imap,
+ mbfl_no_encoding_ascii,
+ mbfl_no_encoding_euc_jp,
+ mbfl_no_encoding_sjis,
+ mbfl_no_encoding_eucjp_win,
+ mbfl_no_encoding_sjis_win,
+ mbfl_no_encoding_sjis_mac,
+ mbfl_no_encoding_jis,
+ mbfl_no_encoding_2022jp,
+ mbfl_no_encoding_8859_1,
+ mbfl_no_encoding_8859_2,
+ mbfl_no_encoding_8859_3,
+ mbfl_no_encoding_8859_4,
+ mbfl_no_encoding_8859_5,
+ mbfl_no_encoding_8859_6,
+ mbfl_no_encoding_8859_7,
+ mbfl_no_encoding_8859_8,
+ mbfl_no_encoding_8859_9,
+ mbfl_no_encoding_8859_10,
+ mbfl_no_encoding_8859_13,
+ mbfl_no_encoding_8859_14,
+ mbfl_no_encoding_8859_15,
+ mbfl_no_encoding_charset_max
+};
+
+
+/*
+ * language
+ */
+typedef struct _mbfl_language {
+ enum mbfl_no_language no_language;
+ const char *name;
+ const char *short_name;
+ const char *(*aliases)[];
+ enum mbfl_no_encoding mail_charset;
+ enum mbfl_no_encoding mail_header_encoding;
+ enum mbfl_no_encoding mail_body_encoding;
+} mbfl_language;
+
+
+/*
+ * encoding
+ */
+typedef struct _mbfl_encoding {
+ enum mbfl_no_encoding no_encoding;
+ const char *name;
+ const char *mime_name;
+ const char *(*aliases)[];
+ const unsigned char *mblen_table;
+ unsigned int flag;
+} mbfl_encoding;
+
+
+#define MBFL_ENCTYPE_SBCS 0x00000001
+#define MBFL_ENCTYPE_MBCS 0x00000002
+#define MBFL_ENCTYPE_WCS2BE 0x00000010
+#define MBFL_ENCTYPE_WCS2LE 0x00000020
+#define MBFL_ENCTYPE_MWC2BE 0x00000040
+#define MBFL_ENCTYPE_MWC2LE 0x00000080
+#define MBFL_ENCTYPE_WCS4BE 0x00000100
+#define MBFL_ENCTYPE_WCS4LE 0x00000200
+#define MBFL_ENCTYPE_MWC4BE 0x00000400
+#define MBFL_ENCTYPE_MWC4LE 0x00000800
+#define MBFL_ENCTYPE_SHFTCODE 0x00001000
+
+/* wchar plane, spesial charactor */
+#define MBFL_WCSPLANE_MASK 0xffff
+#define MBFL_WCSPLANE_UCS2MAX 0x00010000
+#define MBFL_WCSPLANE_SUPMIN 0x00010000
+#define MBFL_WCSPLANE_SUPMAX 0x00200000
+#define MBFL_WCSPLANE_JIS0208 0x70e10000 /* JIS HEX : 2121h - 7E7Eh */
+#define MBFL_WCSPLANE_JIS0212 0x70e20000 /* JIS HEX : 2121h - 7E7Eh */
+#define MBFL_WCSPLANE_WINCP932 0x70e30000 /* JIS HEX : 2121h - 9898h */
+#define MBFL_WCSPLANE_8859_1 0x70e40000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_2 0x70e50000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_3 0x70e60000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_4 0x70e70000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_5 0x70e80000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_6 0x70e90000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_7 0x70ea0000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_8 0x70eb0000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_9 0x70ec0000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_10 0x70ed0000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_13 0x70ee0000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_14 0x70ef0000 /* 00h - FFh */
+#define MBFL_WCSPLANE_8859_15 0x70f00000 /* 00h - FFh */
+#define MBFL_WCSPLANE_KSC5601 0x70f10000 /* 2121h - 7E7Eh */
+#define MBFL_WCSPLANE_GB2312 0x70f20000 /* 2121h - 7E7Eh */
+
+#define MBFL_WCSGROUP_MASK 0xffffff
+#define MBFL_WCSGROUP_UCS4MAX 0x70000000
+#define MBFL_WCSGROUP_WCHARMAX 0x78000000
+#define MBFL_WCSGROUP_THROUGH 0x78000000 /* 000000h - FFFFFFh */
+
+
+/*
+ * string object
+ */
+typedef struct _mbfl_string {
+ enum mbfl_no_language no_language;
+ enum mbfl_no_encoding no_encoding;
+ unsigned char *val;
+ unsigned int len;
+} mbfl_string;
+
+void mbfl_string_init(mbfl_string *string);
+
+
+/*
+ * language resolver
+ */
+mbfl_language * mbfl_name2language(const char *name);
+mbfl_language * mbfl_no2language(enum mbfl_no_language no_language);
+enum mbfl_no_language mbfl_name2no_language(const char *name);
+
+
+/*
+ * encoding resolver
+ */
+mbfl_encoding * mbfl_name2encoding(const char *name);
+mbfl_encoding * mbfl_no2encoding(enum mbfl_no_encoding no_encoding);
+enum mbfl_no_encoding mbfl_name2no_encoding(const char *name);
+const char * mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding);
+const char * mbfl_no2preferred_mime_name(enum mbfl_no_encoding no_encoding);
+int mbfl_is_support_encoding(const char *name);
+
+
+/*
+ * memory output function
+ */
+#define MBFL_MEMORY_DEVICE_ALLOC_SIZE 64
+
+typedef struct _mbfl_memory_device {
+ unsigned char *buffer;
+ int length;
+ int pos;
+ int allocsz;
+} mbfl_memory_device;
+
+typedef struct _mbfl_wchar_device {
+ unsigned int *buffer;
+ int length;
+ int pos;
+ int allocsz;
+} mbfl_wchar_device;
+
+void mbfl_memory_device_init(mbfl_memory_device *device, int initsz, int allocsz);
+void mbfl_memory_device_realloc(mbfl_memory_device *device, int initsz, int allocsz);
+void mbfl_memory_device_clear(mbfl_memory_device *device);
+void mbfl_memory_device_reset(mbfl_memory_device *device);
+mbfl_string * mbfl_memory_device_result(mbfl_memory_device *device, mbfl_string *result);
+int mbfl_memory_device_output(int c, void *data);
+int mbfl_memory_device_output2(int c, void *data);
+int mbfl_memory_device_output4(int c, void *data);
+int mbfl_memory_device_strcat(mbfl_memory_device *device, const char *psrc);
+int mbfl_memory_device_strncat(mbfl_memory_device *device, const char *psrc, int len);
+int mbfl_memory_device_devcat(mbfl_memory_device *dest, mbfl_memory_device *src);
+
+void mbfl_wchar_device_init(mbfl_wchar_device *device);
+int mbfl_wchar_device_output(int c, void *data);
+
+
+/*
+ * convert filter
+ */
+#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE 0
+#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR 1
+#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG 2
+
+typedef struct _mbfl_convert_filter mbfl_convert_filter;
+
+struct _mbfl_convert_filter {
+ void (*filter_ctor)(mbfl_convert_filter *filter);
+ void (*filter_dtor)(mbfl_convert_filter *filter);
+ int (*filter_function)(int c, mbfl_convert_filter *filter);
+ int (*filter_flush)(mbfl_convert_filter *filter);
+ int (*output_function)(int c, void *data);
+ int (*flush_function)(void *data);
+ void *data;
+ int status;
+ int cache;
+ mbfl_encoding *from;
+ mbfl_encoding *to;
+ int illegal_mode;
+ int illegal_substchar;
+};
+
+struct mbfl_convert_vtbl {
+ enum mbfl_no_encoding from;
+ enum mbfl_no_encoding to;
+ void (*filter_ctor)(mbfl_convert_filter *filter);
+ void (*filter_dtor)(mbfl_convert_filter *filter);
+ int (*filter_function)(int c, mbfl_convert_filter *filter);
+ int (*filter_flush)(mbfl_convert_filter *filter);
+};
+
+mbfl_convert_filter *
+mbfl_convert_filter_new(
+ enum mbfl_no_encoding from,
+ enum mbfl_no_encoding to,
+ int (*output_function)(int, void *),
+ int (*flush_function)(void *),
+ void *data);
+void mbfl_convert_filter_delete(mbfl_convert_filter *filter);
+int mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter);
+int mbfl_convert_filter_flush(mbfl_convert_filter *filter);
+void mbfl_convert_filter_reset(mbfl_convert_filter *filter, enum mbfl_no_encoding from, enum mbfl_no_encoding to);
+void mbfl_convert_filter_copy(mbfl_convert_filter *src, mbfl_convert_filter *dist);
+int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter);
+
+
+/*
+ * identify filter
+ */
+typedef struct _mbfl_identify_filter mbfl_identify_filter;
+
+struct _mbfl_identify_filter {
+ void (*filter_ctor)(mbfl_identify_filter *filter);
+ void (*filter_dtor)(mbfl_identify_filter *filter);
+ int (*filter_function)(int c, mbfl_identify_filter *filter);
+ int status;
+ int flag;
+ int score;
+ mbfl_encoding *encoding;
+};
+
+struct mbfl_identify_vtbl {
+ enum mbfl_no_encoding encoding;
+ void (*filter_ctor)(mbfl_identify_filter *filter);
+ void (*filter_dtor)(mbfl_identify_filter *filter);
+ int (*filter_function)(int c, mbfl_identify_filter *filter);
+};
+
+mbfl_identify_filter * mbfl_identify_filter_new(enum mbfl_no_encoding encoding);
+void mbfl_identify_filter_delete(mbfl_identify_filter *filter);
+
+
+/*
+ * buffering converter
+ */
+typedef struct _mbfl_buffer_converter mbfl_buffer_converter;
+
+struct _mbfl_buffer_converter {
+ mbfl_convert_filter *filter1;
+ mbfl_convert_filter *filter2;
+ mbfl_memory_device device;
+ mbfl_encoding *from;
+ mbfl_encoding *to;
+};
+
+mbfl_buffer_converter * mbfl_buffer_converter_new(enum mbfl_no_encoding from, enum mbfl_no_encoding to, int buf_initsz);
+void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd);
+void mbfl_buffer_converter_reset(mbfl_buffer_converter *convd);
+int mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode);
+int mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar);
+int mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n);
+int mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string);
+mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result);
+mbfl_string * mbfl_buffer_converter_feed_getbuffer(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result);
+mbfl_string * mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result);
+
+
+/*
+ * encoding detector
+ */
+typedef struct _mbfl_encoding_detector mbfl_encoding_detector;
+
+struct _mbfl_encoding_detector {
+ mbfl_identify_filter **filter_list;
+ int filter_list_size;
+};
+
+mbfl_encoding_detector * mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int eliztsz);
+void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd);
+int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string);
+enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd);
+
+
+/*
+ * encoding converter
+ */
+mbfl_string *
+mbfl_convert_encoding(mbfl_string *string, mbfl_string *result, enum mbfl_no_encoding toenc);
+
+
+/*
+ * identify encoding
+ */
+mbfl_encoding *
+mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz);
+
+const char *
+mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz);
+
+enum mbfl_no_encoding
+mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int eliztsz);
+
+/*
+ * strlen
+ */
+int
+mbfl_strlen(mbfl_string *string);
+
+/*
+ * strpos
+ */
+int
+mbfl_strpos(mbfl_string *haystack, mbfl_string *needle, int offset, int reverse);
+
+/*
+ * substr
+ */
+mbfl_string *
+mbfl_substr(mbfl_string *string, mbfl_string *result, int from, int length);
+
+/*
+ * strcut
+ */
+mbfl_string *
+mbfl_strcut(mbfl_string *string, mbfl_string *result, int from, int length);
+
+/*
+ * strwidth
+ */
+int
+mbfl_strwidth(mbfl_string *string);
+
+/*
+ * strimwidth
+ */
+mbfl_string *
+mbfl_strimwidth(mbfl_string *string, mbfl_string *marker, mbfl_string *result, int from, int width);
+
+/*
+ * MIME header encode
+ */
+struct mime_header_encoder_data; /* forward declaration */
+
+struct mime_header_encoder_data *
+mime_header_encoder_new(
+ enum mbfl_no_encoding incode,
+ enum mbfl_no_encoding outcode,
+ enum mbfl_no_encoding encoding);
+
+void
+mime_header_encoder_delete(struct mime_header_encoder_data *pe);
+
+int
+mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe);
+
+mbfl_string *
+mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result);
+
+mbfl_string *
+mbfl_mime_header_encode(
+ mbfl_string *string, mbfl_string *result,
+ enum mbfl_no_encoding outcode,
+ enum mbfl_no_encoding encoding,
+ const char *linefeed,
+ int indent);
+
+/*
+ * MIME header decode
+ */
+struct mime_header_decoder_data; /* forward declaration */
+
+struct mime_header_decoder_data *
+mime_header_decoder_new(enum mbfl_no_encoding outcode);
+
+void
+mime_header_decoder_delete(struct mime_header_decoder_data *pd);
+
+int
+mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd);
+
+mbfl_string *
+mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result);
+
+mbfl_string *
+mbfl_mime_header_decode(
+ mbfl_string *string,
+ mbfl_string *result,
+ enum mbfl_no_encoding outcode);
+
+
+/*
+ * convert HTML numeric entity
+ */
+mbfl_string *
+mbfl_html_numeric_entity(mbfl_string *string, mbfl_string *result, int *convmap, int mapsize, int type);
+
+
+/*
+ * convert of harfwidth and fullwidth for japanese
+ */
+mbfl_string *
+mbfl_ja_jp_hantozen(mbfl_string *string, mbfl_string *result, int mode);
+
+#endif /* MBFL_MBFILTER_H */
View
7,331 ext/mbstring/mbfilter_ja.c
7,331 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
95 ext/mbstring/mbfilter_ja.h
@@ -0,0 +1,95 @@
+/* charset=UTF-8 */
+
+/*
+ * "streamable kanji code filter and converter"
+ *
+ * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
+ *
+ * This software is released under the GNU Lesser General Public License.
+ * Please read the following detail of the licence (in japanese).
+ *
+ * ◆使用許諾条件◆
+ *
+ * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
+ * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
+ * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
+ * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
+ * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
+ * することはできません。
+ *
+ * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
+ * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
+ * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
+ * による許諾を得る必要があります。
+ *
+ * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
+ * ます。「GNU Lesser General Public License」とは、これまでLibrary General
+ * Public Licenseと呼ばれていたものです。
+ * http://www.gnu.org/ --- GNUウェブサイト
+ * http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
+ * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
+ *
+ * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
+ * はありません。
+ *
+ * ◆保証内容◆
+ *
+ * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
+ * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
+ * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
+ * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
+ * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
+ * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
+ * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
+ * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
+ * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
+ * 契約・規定に優先します。
+ *
+ * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
+ *
+ * 〒112-0004東京都文京区後楽1-1-13小野ビル7F
+ * 株式会社ハッピーサイズ
+ * Phone: 03-5803-2964, Fax: 03-5803-2965
+ * http://www.happysize.co.jp/ mailto:info@happysize.co.jp
+ *
+ * ◆著者◆
+ *
+ * 金本 茂 <sgk@happysize.co.jp>
+ *
+ * ◆履歴◆
+ *
+ * 1998/11/10 sgk implementation in C++
+ * 1999/4/25 sgk Cで書きなおし。
+ * 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
+ * 1999/6/?? Unicodeサポート。
+ * 1999/6/22 sgk ライセンスをLGPLに変更。
+ *
+ */
+
+/*
+ * Unicode support
+ *
+ * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
+ * All rights reserved.
+ *
+ */
+
+/* $Id$ */
+
+#ifndef MBFL_MBFILTER_JA_H
+#define MBFL_MBFILTER_JA_H
+
+int mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_JA_H */
View
2,457 ext/mbstring/mbstring.c
@@ -0,0 +1,2457 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP version 4.0 |
+ +----------------------------------------------------------------------+
+ | Copyright (c) 2001 The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 2.02 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available at through the world-wide-web at |
+ | http://www.php.net/license/2_02.txt. |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
+ +----------------------------------------------------------------------+
+ */
+
+/* $Id$ */
+
+/*
+ * PHP4 Multibyte String module "mbstring" (currently only for Japanese)
+ *
+ * History:
+ * 2000.5.19 Release php-4.0RC2_jstring-1.0
+ * 2001.4.1 Release php4_jstring-1.0.91
+ * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
+ * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
+ */
+
+/*
+ * PHP3 Internationalization support program.
+ *
+ * Copyright (c) 1999,2000 by the PHP3 internationalization team.
+ * All rights reserved.
+ *
+ * See README_PHP3-i18n-ja for more detail.
+ *
+ * Authors:
+ * Hironori Sato <satoh@jpnnet.com>
+ * Shigeru Kanemoto <sgk@happysize.co.jp>
+ * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
+ */
+
+
+#include "php.h"
+#include "php_ini.h"
+#include "php_config.h"
+#include "mbstring.h"
+#include "ext/standard/php_string.h"
+#include "ext/standard/php_mail.h"
+#include "ext/standard/url.h"
+#include "ext/standard/php_output.h"
+
+#include "php_variables.h"
+#include "php_globals.h"
+#include "rfc1867.h"
+#include "php_content_types.h"
+#include "SAPI.h"
+
+#if HAVE_MBSTRING
+
+static enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
+ mbfl_no_encoding_ascii,
+ mbfl_no_encoding_jis,
+ mbfl_no_encoding_utf8,
+ mbfl_no_encoding_euc_jp,
+ mbfl_no_encoding_sjis
+};
+
+static int php_mbstr_default_identify_list_size = sizeof(php_mbstr_default_identify_list)/sizeof(enum mbfl_no_encoding);
+
+static unsigned char third_and_rest_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE_REST };
+
+SAPI_POST_HANDLER_FUNC(php_mbstr_post_handler);
+
+static sapi_post_entry mbstr_post_entries[] = {
+ { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mbstr_post_handler },
+ { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, sapi_read_standard_form_data, rfc1867_post_handler },
+ { NULL, 0, NULL }
+};
+
+function_entry mbstring_functions[] = {
+ PHP_FE(mbstr_internal_encoding, NULL)
+ PHP_FE(mbstr_http_input, NULL)
+ PHP_FE(mbstr_http_output, NULL)
+ PHP_FE(mbstr_detect_order, NULL)
+ PHP_FE(mbstr_substitute_character, NULL)
+ PHP_FE(mbstr_gpc_handler, NULL)
+ PHP_FE(mbstr_output_handler, NULL)
+ PHP_FE(mbstr_preferred_mime_name, NULL)
+ PHP_FE(mbstr_strlen, NULL)
+ PHP_FE(mbstr_strpos, NULL)
+ PHP_FE(mbstr_strrpos, NULL)
+ PHP_FE(mbstr_substr, NULL)
+ PHP_FE(mbstr_strcut, NULL)
+ PHP_FE(mbstr_strwidth, NULL)
+ PHP_FE(mbstr_strimwidth, NULL)
+ PHP_FE(mbstr_convert_encoding, NULL)
+ PHP_FE(mbstr_detect_encoding, NULL)
+ PHP_FE(mbstr_convert_kana, NULL)
+ PHP_FE(mbstr_encode_mimeheader, NULL)
+ PHP_FE(mbstr_decode_mimeheader, NULL)
+ PHP_FE(mbstr_convert_variables, third_and_rest_force_ref)
+ PHP_FE(mbstr_encode_numericentity, NULL)
+ PHP_FE(mbstr_decode_numericentity, NULL)
+ PHP_FE(mbstr_send_mail, NULL)
+ PHP_FALIAS(mbstrlen, mbstr_strlen, NULL)
+ PHP_FALIAS(mbstrpos, mbstr_strpos, NULL)
+ PHP_FALIAS(mbstrrpos, mbstr_strrpos, NULL)
+ PHP_FALIAS(mbsubstr, mbstr_substr, NULL)
+ PHP_FALIAS(mbstrcut, mbstr_strcut, NULL)
+ PHP_FALIAS(i18n_internal_encoding, mbstr_internal_encoding, NULL)
+ PHP_FALIAS(jstr_default_encoding, mbstr_internal_encoding, NULL)
+ PHP_FALIAS(i18n_http_input, mbstr_http_input, NULL)
+ PHP_FALIAS(i18n_http_output, mbstr_http_output, NULL)
+ PHP_FALIAS(i18n_convert, mbstr_convert_encoding, NULL)
+ PHP_FALIAS(i18n_discover_encoding, mbstr_detect_encoding, NULL)
+ PHP_FALIAS(i18n_mime_header_encode, mbstr_encode_mimeheader, NULL)
+ PHP_FALIAS(i18n_mime_header_decode, mbstr_decode_mimeheader, NULL)
+ PHP_FALIAS(i18n_ja_jp_hantozen, mbstr_convert_kana, NULL)
+ PHP_FALIAS(jstr_convert_hantozen, mbstr_convert_kana, NULL)
+ PHP_FALIAS(jstr_strlen, mbstr_strlen, NULL)
+ PHP_FALIAS(jstr_strpos, mbstr_strpos, NULL)
+ PHP_FALIAS(jstr_strrpos, mbstr_strrpos, NULL)
+ PHP_FALIAS(jstr_substr, mbstr_substr, NULL)
+ PHP_FALIAS(jstr_strcut, mbstr_strcut, NULL)
+ PHP_FALIAS(jstr_strwidth, mbstr_strwidth, NULL)
+ PHP_FALIAS(jstr_strimwidth, mbstr_strimwidth, NULL)
+ PHP_FALIAS(jstr_convert_encoding, mbstr_convert_encoding, NULL)
+ PHP_FALIAS(jstr_detect_encoding, mbstr_detect_encoding, NULL)
+ PHP_FALIAS(jstr_convert_kana, mbstr_convert_kana, NULL)
+ PHP_FALIAS(jstr_send_mail, mbstr_send_mail, NULL)
+ { NULL, NULL, NULL }
+};
+
+zend_module_entry mbstring_module_entry = {
+ "mbstring",
+ mbstring_functions,
+ PHP_MINIT(mbstring),
+ PHP_MSHUTDOWN(mbstring),
+ PHP_RINIT(mbstring),
+ PHP_RSHUTDOWN(mbstring),
+ PHP_MINFO(mbstring),
+ STANDARD_MODULE_PROPERTIES
+};
+
+ZEND_DECLARE_MODULE_GLOBALS(mbstring)
+
+#ifdef COMPILE_DL_MBSTRING
+ZEND_GET_MODULE(mbstring)
+#endif
+
+
+static int
+php_mbstring_parse_encoding_list(const char *value, int value_length, int **return_list, int *return_size, int persistent)
+{
+ int n, l, size, bauto, *src, *list, *entry;
+ char *p, *p1, *p2, *endp, *tmpstr;
+ enum mbfl_no_encoding no_encoding;
+
+ list = NULL;
+ if (value == NULL || value_length <= 0) {
+ return 0;
+ } else {
+ /* copy the value string for work */
+ tmpstr = (char *)estrndup(value, value_length);
+ if (tmpstr == NULL) {
+ return 0;
+ }
+ /* count the number of listed encoding names */
+ endp = tmpstr + value_length;
+ n = 1;
+ p1 = tmpstr;
+ while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
+ p1 = p2 + 1;
+ n++;
+ }
+ size = n + php_mbstr_default_identify_list_size;
+ /* make list */
+ list = (int *)pecalloc(size, sizeof(int), persistent);
+ if (list != NULL) {
+ entry = list;
+ n = 0;
+ bauto = 0;
+ p1 = tmpstr;
+ do {
+ p2 = p = php_memnstr(p1, ",", 1, endp);
+ if (p == NULL) {
+ p = endp;
+ }
+ *p = '\0';
+ /* trim spaces */
+ while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
+ p1++;
+ }
+ p--;
+ while (p > p1 && (*p == ' ' || *p == '\t')) {
+ *p = '\0';
+ p--;
+ }
+ /* convert to the encoding number and check encoding */
+ no_encoding = mbfl_name2no_encoding(p1);
+ if (no_encoding == mbfl_no_encoding_auto) {
+ if (!bauto) {
+ bauto = 1;
+ l = php_mbstr_default_identify_list_size;
+ src = php_mbstr_default_identify_list;
+ while (l > 0) {
+ *entry++ = *src++;
+ l--;
+ n++;
+ }
+ }
+ } else if (no_encoding != mbfl_no_encoding_invalid) {
+ *entry++ = no_encoding;
+ n++;
+ }
+ p1 = p2 + 1;
+ } while (n < size && p2 != NULL);
+ *return_list = list;
+ *return_size = n;
+ }
+ efree(tmpstr);
+ }
+
+ if (list == NULL) {
+ return 0;
+ }
+
+ return 1;
+}
+
+static int
+php_mbstring_parse_encoding_array(zval *array, int **return_list, int *return_size, int persistent)
+{
+ zval **hash_entry;
+ HashTable *target_hash;
+ int i, n, l, size, bauto, *list, *entry, *src;
+ enum mbfl_no_encoding no_encoding;
+
+ list = NULL;
+ if (Z_TYPE_P(array) == IS_ARRAY) {
+ target_hash = array->value.ht;
+ zend_hash_internal_pointer_reset(target_hash);
+ i = zend_hash_num_elements(target_hash);
+ size = i + php_mbstr_default_identify_list_size;
+ list = (int *)pecalloc(size, sizeof(int), persistent);
+ if (list != NULL) {
+ entry = list;
+ bauto = 0;
+ n = 0;
+ while (i > 0) {
+ if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
+ break;
+ }
+ convert_to_string_ex(hash_entry);
+ no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
+ if (no_encoding == mbfl_no_encoding_auto) {
+ if (!bauto) {
+ bauto = 1;
+ l = php_mbstr_default_identify_list_size;
+ src = php_mbstr_default_identify_list;
+ while (l > 0) {
+ *entry++ = *src++;
+ l--;
+ n++;
+ }
+ }
+ } else if (no_encoding != mbfl_no_encoding_invalid) {
+ *entry++ = no_encoding;
+ n++;
+ }
+ zend_hash_move_forward(target_hash);
+ i--;
+ }
+ *return_list = list;
+ *return_size = n;
+ }
+ }
+
+ if (list == NULL) {
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/* php.ini directive handler */
+static PHP_INI_MH(OnUpdate_mbstring_detect_order)
+{
+ int *list, size;
+ MBSTRLS_FETCH();
+
+ if (php_mbstring_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+ if (MBSTRG(detect_order_list) != NULL) {
+ free(MBSTRG(detect_order_list));
+ }
+ MBSTRG(detect_order_list) = list;
+ MBSTRG(detect_order_list_size) = size;
+ } else {
+ return FAILURE;
+ }
+
+ return SUCCESS;
+}
+
+static PHP_INI_MH(OnUpdate_mbstring_http_input)
+{
+ int *list, size;
+ MBSTRLS_FETCH();
+
+ if (php_mbstring_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+ if (MBSTRG(http_input_list) != NULL) {
+ free(MBSTRG(http_input_list));
+ }
+ MBSTRG(http_input_list) = list;
+ MBSTRG(http_input_list_size) = size;
+ } else {
+ return FAILURE;
+ }
+
+ return SUCCESS;
+}
+
+static PHP_INI_MH(OnUpdate_mbstring_http_output)
+{
+ enum mbfl_no_encoding no_encoding;
+ MBSTRLS_FETCH();
+
+ no_encoding = mbfl_name2no_encoding(new_value);
+ if (no_encoding != mbfl_no_encoding_invalid) {
+ MBSTRG(http_output_encoding) = no_encoding;
+ MBSTRG(current_http_output_encoding) = no_encoding;
+ } else {
+ if (new_value != NULL && new_value_length > 0) {
+ return FAILURE;
+ }
+ }
+
+ return SUCCESS;
+}
+
+static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
+{
+ enum mbfl_no_encoding no_encoding;
+ MBSTRLS_FETCH();
+
+ no_encoding = mbfl_name2no_encoding(new_value);
+ if (no_encoding != mbfl_no_encoding_invalid) {
+ MBSTRG(internal_encoding) = no_encoding;
+ MBSTRG(current_internal_encoding) = no_encoding;
+ } else {
+ if (new_value != NULL && new_value_length > 0) {
+ return FAILURE;
+ }
+ }
+
+ return SUCCESS;
+}
+
+static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
+{
+ MBSTRLS_FETCH();
+
+ if (new_value != NULL) {
+ if (strcasecmp("none", new_value) == 0) {
+ MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
+ } else if (strcasecmp("long", new_value) == 0) {
+ MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
+ } else {
+ MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
+ MBSTRG(filter_illegal_substchar) = zend_atoi(new_value, new_value_length);
+ }
+ }
+
+ return SUCCESS;
+}
+
+/* php.ini directive registration */
+PHP_INI_BEGIN()
+ PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
+ PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
+ PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
+ PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
+ PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
+PHP_INI_END()
+
+
+/* module global initialize handler */
+static void
+php_mbstring_init_globals(zend_mbstring_globals *pglobals)
+{
+ pglobals->language = mbfl_no_language_japanese;
+ pglobals->current_language = mbfl_no_language_japanese;
+ pglobals->internal_encoding = mbfl_no_encoding_euc_jp;
+ pglobals->current_internal_encoding = mbfl_no_encoding_euc_jp;
+ pglobals->http_output_encoding = mbfl_no_encoding_invalid;
+ pglobals->current_http_output_encoding = mbfl_no_encoding_invalid;
+ pglobals->http_input_identify = mbfl_no_encoding_invalid;
+ pglobals->http_input_identify_get = mbfl_no_encoding_invalid;
+ pglobals->http_input_identify_post = mbfl_no_encoding_invalid;
+ pglobals->http_input_identify_cookie = mbfl_no_encoding_invalid;
+ pglobals->http_input_list = NULL;
+ pglobals->http_input_list_size = 0;
+ pglobals->detect_order_list = NULL;
+ pglobals->detect_order_list_size = 0;
+ pglobals->current_detect_order_list = NULL;
+ pglobals->current_detect_order_list_size = 0;
+ pglobals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
+ pglobals->filter_illegal_substchar = 0x3f; /* '?' */
+ pglobals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
+ pglobals->current_filter_illegal_substchar = 0x3f; /* '?' */
+ pglobals->outconv = NULL;
+}
+
+PHP_MINIT_FUNCTION(mbstring)
+{
+ ZEND_INIT_MODULE_GLOBALS(mbstring, php_mbstring_init_globals, NULL);
+ REGISTER_INI_ENTRIES();
+
+#if defined(MBSTR_ENC_TRANS)
+ sapi_unregister_post_entry(mbstr_post_entries);
+ sapi_register_post_entries(mbstr_post_entries);
+#endif
+
+ return SUCCESS;
+}
+
+
+PHP_MSHUTDOWN_FUNCTION(mbstring)
+{
+ MBSTRLS_FETCH();
+ UNREGISTER_INI_ENTRIES();
+
+ if (MBSTRG(http_input_list)) {
+ free(MBSTRG(http_input_list));
+ }
+ if (MBSTRG(detect_order_list)) {
+ free(MBSTRG(detect_order_list));
+ }
+
+ return SUCCESS;
+}
+
+
+PHP_RINIT_FUNCTION(mbstring)
+{
+ int n, *list, *entry;
+ MBSTRLS_FETCH();
+
+ MBSTRG(current_language) = MBSTRG(language);
+ MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
+ MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
+ MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
+ MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
+ MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
+ MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
+ MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
+ MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
+ n = 0;
+ if (MBSTRG(detect_order_list)) {
+ list = MBSTRG(detect_order_list);
+ n = MBSTRG(detect_order_list_size);
+ }
+ if (n <= 0) {
+ list = php_mbstr_default_identify_list;
+ n = php_mbstr_default_identify_list_size;
+ }
+ entry = (int *)emalloc(n*sizeof(int));
+ if (entry != NULL) {
+ MBSTRG(current_detect_order_list) = entry;
+ MBSTRG(current_detect_order_list_size) = n;
+ while (n > 0) {
+ *entry++ = *list++;
+ n--;
+ }
+ }
+
+ return SUCCESS;
+}
+
+
+PHP_RSHUTDOWN_FUNCTION(mbstring)
+{
+ MBSTRLS_FETCH();
+
+ if (MBSTRG(current_detect_order_list) != NULL) {
+ efree(MBSTRG(current_detect_order_list));
+ MBSTRG(current_detect_order_list) = NULL;
+ MBSTRG(current_detect_order_list_size) = 0;
+ }
+ if (MBSTRG(outconv) != NULL) {
+ mbfl_buffer_converter_delete(MBSTRG(outconv));
+ MBSTRG(outconv) = NULL;
+ }
+
+ return SUCCESS;
+}
+
+
+PHP_MINFO_FUNCTION(mbstring)
+{
+ php_info_print_table_start();
+ php_info_print_table_header(2, "Multibyte (Japanese) Support", "enabled");
+#if defined(MBSTR_ENC_TRANS)
+ php_info_print_table_row(2, "http input encoding translation", "enabled");
+#endif
+ php_info_print_table_end();
+
+ DISPLAY_INI_ENTRIES();
+}
+
+
+
+/* {{{ proto string mbstr_internal_encoding([string encoding])
+ Sets the current internal encoding or Returns the current internal encoding as a string. */
+PHP_FUNCTION(mbstr_internal_encoding)
+{
+ pval **arg1;
+ char *name;
+ enum mbfl_no_encoding no_encoding;
+ MBSTRLS_FETCH();
+
+ if (ZEND_NUM_ARGS() == 0) {
+ name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
+ if (name != NULL) {
+ RETURN_STRING(name, 1);
+ } else {
+ RETURN_FALSE;
+ }
+ } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
+ convert_to_string_ex(arg1);
+ no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg1));
+ if (no_encoding == mbfl_no_encoding_invalid) {
+ php_error(E_WARNING, "unknown encoding \"%s\"", Z_STRVAL_PP(arg1));
+ RETURN_FALSE;
+ } else {
+ MBSTRG(current_internal_encoding) = no_encoding;
+ RETURN_TRUE;
+ }
+ } else {
+ WRONG_PARAM_COUNT;
+ }
+}
+/* }}} */
+
+
+/* {{{ proto string mbstr_http_input([string type])
+ Returns the input encoding. */
+PHP_FUNCTION(mbstr_http_input)
+{
+ pval **arg1;
+ int result, retname, n, *entry;
+ char *name;
+ MBSTRLS_FETCH();
+
+ retname = 1;
+ if (ZEND_NUM_ARGS() == 0) {
+ result = MBSTRG(http_input_identify);
+ } else if (ARG_COUNT(ht) == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
+ convert_to_string_ex(arg1);
+ switch (*(Z_STRVAL_PP(arg1))) {
+ case 'G':
+ case 'g':
+ result = MBSTRG(http_input_identify_get);
+ break;
+ case 'P':
+ case 'p':
+ result = MBSTRG(http_input_identify_post);
+ break;
+ case 'C':
+ case 'c':
+ result = MBSTRG(http_input_identify_cookie);
+ break;
+ case 'I':
+ case 'i':
+ if (array_init(return_value) == FAILURE) {
+ RETURN_FALSE;
+ }
+ entry = MBSTRG(http_input_list);
+ n = MBSTRG(http_input_list_size);
+ while (n > 0) {
+ name = (char *)mbfl_no_encoding2name(*entry);
+ if (name) {
+ add_next_index_string(return_value, name, 1);
+ }
+ entry++;
+ n--;
+ }
+ retname = 0;
+ break;
+ default:
+ result = MBSTRG(http_input_identify);
+ break;
+ }
+ } else {
+ WRONG_PARAM_COUNT;
+ }
+
+ if (retname) {
+ name = (char *)mbfl_no_encoding2name(result);
+ if (name != NULL) {
+ RETVAL_STRING(name, 1);
+ } else {
+ RETVAL_FALSE;
+ }
+ }
+}
+/* }}} */
+
+
+/* {{{ proto string mbstr_http_output([string encoding])
+ Sets the current output_encoding or Returns the current output_encoding as a string. */
+PHP_FUNCTION(mbstr_http_output)
+{
+ pval **arg1;
+ char *name;
+ enum mbfl_no_encoding no_encoding;
+ MBSTRLS_FETCH();
+
+ if (ZEND_NUM_ARGS() == 0) {
+ name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
+ if (name != NULL) {
+ RETURN_STRING(name, 1);
+ } else {
+ RETURN_FALSE;
+ }
+ } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
+ convert_to_string_ex(arg1);
+ no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg1));
+ if (no_encoding == mbfl_no_encoding_invalid) {
+ php_error(E_WARNING, "unknown encoding \"%s\"", Z_STRVAL_PP(arg1));
+ RETURN_FALSE;
+ } else {
+ MBSTRG(current_http_output_encoding) = no_encoding;
+ RETURN_TRUE;
+ }
+ } else {
+ WRONG_PARAM_COUNT;
+ }
+}
+/* }}} */
+
+
+/* {{{ proto array mbstr_detect_order([mixed encoding-list])
+ Sets the current detect_order or Return the current detect_order as a array. */
+PHP_FUNCTION(mbstr_detect_order)
+{
+ pval **arg1;
+ int n, size, *list, *entry;
+ char *name;
+ MBSTRLS_FETCH();
+
+ if (ZEND_NUM_ARGS() == 0) {
+ if (array_init(return_value) == FAILURE) {
+ RETURN_FALSE;
+ }
+ entry = MBSTRG(current_detect_order_list);
+ n = MBSTRG(current_detect_order_list_size);
+ while (n > 0) {
+ name = (char *)mbfl_no_encoding2name(*entry);
+ if (name) {
+ add_next_index_string(return_value, name, 1);
+ }
+ entry++;
+ n--;
+ }
+ } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
+ list = NULL;
+ size = 0;
+ switch (Z_TYPE_PP(arg1)) {
+ case IS_ARRAY:
+ php_mbstring_parse_encoding_array(*arg1, &list, &size, 0);
+ break;
+ default:
+ convert_to_string_ex(arg1);
+ php_mbstring_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0);
+ break;
+ }
+ if (list == NULL) {
+ RETVAL_FALSE;
+ } else {
+ if (MBSTRG(current_detect_order_list)) {
+ efree(MBSTRG(current_detect_order_list));
+ }
+ MBSTRG(current_detect_order_list) = list;
+ MBSTRG(current_detect_order_list_size) = size;
+ RETVAL_TRUE;
+ }
+ } else {
+ WRONG_PARAM_COUNT;
+ }
+}
+/* }}} */
+
+
+/* {{{ proto mixed mbstr_substitute_character([mixed substchar])
+ Sets the current substitute_character or Returns the current substitute_character. */
+PHP_FUNCTION(mbstr_substitute_character)
+{
+ pval **arg1;
+ MBSTRLS_FETCH();
+
+ if (ZEND_NUM_ARGS() == 0) {
+ if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+ RETVAL_STRING("none", 1);
+ } else if(MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
+ RETVAL_STRING("long", 1);
+ } else {
+ RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
+ }
+ } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
+ switch (Z_TYPE_PP(arg1)) {
+ case IS_STRING:
+ if (strcasecmp("none", Z_STRVAL_PP(arg1)) == 0) {
+ MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
+ } else if (strcasecmp("long", Z_STRVAL_PP(arg1)) == 0) {
+ MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
+ } else {
+ convert_to_long_ex(arg1);
+ MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
+ MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
+ }
+ break;
+ default:
+ convert_to_long_ex(arg1);
+ MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
+ MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
+ break;
+ }
+ } else {
+ WRONG_PARAM_COUNT;
+ }
+}
+/* }}} */
+
+
+/* {{{ proto string mbstr_preferred_mime_name(string encoding)
+ Return the preferred MIME name (charset) as a string. */
+PHP_FUNCTION(mbstr_preferred_mime_name)
+{
+ pval **arg1;
+ enum mbfl_no_encoding no_encoding;
+ const char *name;
+ MBSTRLS_FETCH();
+
+ if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
+ convert_to_string_ex(arg1);
+ no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg1));
+ if (no_encoding == mbfl_no_encoding_invalid) {
+ php_error(E_WARNING, "unknown encoding \"%s\"", Z_STRVAL_PP(arg1));
+ RETVAL_FALSE;
+ } else {
+ name = mbfl_no2preferred_mime_name(no_encoding);
+ if (name == NULL || *name == '\0') {
+ php_error(E_WARNING, "no name for \"%s\"", Z_STRVAL_PP(arg1));
+ RETVAL_FALSE;
+ } else {
+ RETVAL_STRING((char *)name, 1);
+ }
+ }
+ } else {
+ WRONG_PARAM_COUNT;
+ }
+}
+/* }}} */
+
+
+static php_mbstr_encoding_handler(zval *arg, char *res, char *separator) {
+ char *var, *val;
+ char *strtok_buf = NULL, **val_list;
+ zval *array_ptr = (zval *) arg;
+ int n, num, *len_list, *elist, elistsz;
+ enum mbfl_no_encoding from_encoding, to_encoding;
+ mbfl_string string, result, *ret;
+ mbfl_encoding_detector *identd;
+ mbfl_buffer_converter *convd;
+ MBSTRLS_FETCH();
+ ELS_FETCH();
+ PLS_FETCH();
+
+ mbfl_string_init(&string);
+ mbfl_string_init(&result);
+ string.no_language = MBSTRG(current_language);
+ string.no_encoding = MBSTRG(current_internal_encoding);
+
+ /* count the variables contained in the query */
+ num = 1;
+ var = res;
+ n = strlen(res);
+ while(n > 0) {
+ if (*var == *separator) {
+ num++;
+ }
+ var++;
+ n--;
+ }
+ num *= 2;
+ val_list = (char **)ecalloc(num, sizeof(char *));
+ len_list = (int *)ecalloc(num, sizeof(int));
+
+ /* split and decode the query */
+ n = 0;
+ strtok_buf = NULL;
+ var = php_strtok_r(res, separator, &strtok_buf);
+
+ while (var && n < num) {
+ val = strchr(var, '=');
+ if (val) { /* have a value */
+ *val++ = '\0';
+ val_list[n] = var;
+ len_list[n] = php_url_decode(var, strlen(var));
+ n++;
+ val_list[n] = val;
+ len_list[n] = php_url_decode(val, strlen(val));
+ } else {
+ val_list[n] = var;
+ len_list[n] = php_url_decode(var, strlen(var));
+ n++;
+ val_list[n] = NULL;
+ len_list[n] = 0;
+ }
+ n++;
+ var = php_strtok_r(NULL, separator, &strtok_buf);
+ }
+ num = n;
+
+ /* initialize converter */
+ to_encoding = MBSTRG(current_internal_encoding);
+ elist = MBSTRG(http_input_list);
+ elistsz = MBSTRG(http_input_list_size);
+ if (elistsz <= 0) {
+ from_encoding = mbfl_no_encoding_pass;
+ } else if (elistsz == 1) {
+ from_encoding = *elist;
+ } else {
+ /* auto detect */
+ from_encoding = mbfl_no_encoding_invalid;
+ identd = mbfl_encoding_detector_new(elist, elistsz);
+ if (identd) {
+ n = 0;
+ while (n < num) {
+ string.val = val_list[n];
+ string.len = len_list[n];
+ if (mbfl_encoding_detector_feed(identd, &string)) {
+ break;
+ }
+ n++;
+ }
+ from_encoding = mbfl_encoding_detector_judge(identd);
+ mbfl_encoding_detector_delete(identd);
+ }
+ if (from_encoding == mbfl_no_encoding_invalid) {
+ from_encoding = mbfl_no_encoding_pass;
+ }
+ }
+ convd = NULL;
+ if (from_encoding != mbfl_no_encoding_pass) {
+ convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
+ if (convd != NULL) {
+ mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+ } else {
+ php_error(E_WARNING, "Unable to create converter in php_mbstr_encoding_handler()");
+ }
+ }
+
+ /* convert encoding */
+ string.no_encoding = from_encoding;
+ n = 0;
+ while (n < num) {
+ string.val = val_list[n+1];
+ string.len = len_list[n+1];
+ ret = NULL;
+ if (convd) {
+ ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
+ }
+ if (ret != NULL) {
+ php_register_variable_safe(val_list[n], ret->val, ret->len, array_ptr ELS_CC PLS_CC);
+ } else {
+ php_register_variable_safe(val_list[n], val_list[n+1], len_list[n+1], array_ptr ELS_CC PLS_CC);
+ }
+ n+=2;
+ }
+ if (convd != NULL) {
+ mbfl_buffer_converter_delete(convd);
+ }
+ if (val_list != NULL) {
+ efree((void *)val_list);
+ }
+ if (len_list != NULL) {
+ efree((void *)len_list);
+ }
+
+}
+
+SAPI_POST_HANDLER_FUNC(php_mbstr_post_handler)
+{
+ php_mbstr_encoding_handler(arg, SG(request_info).post_data, "&");
+}
+
+/* http input processing */
+void mbstr_treat_data(int arg, char *str, zval* destArray ELS_DC PLS_DC SLS_DC)
+{
+ char *res = NULL, *var, *val, *separator=NULL;
+ const char *c_var;
+ pval *array_ptr;
+ int free_buffer=0;
+
+ switch (arg) {
+ case PARSE_POST:
+ case PARSE_GET:
+ case PARSE_COOKIE:
+ ALLOC_ZVAL(array_ptr);
+ array_init(array_ptr);
+ INIT_PZVAL(array_ptr);
+ switch (arg) {
+ case PARSE_POST:
+ PG(http_globals)[TRACK_VARS_POST] = array_ptr;
+ break;
+ case PARSE_GET:
+ PG(http_globals)[TRACK_VARS_GET] = array_ptr;
+ break;
+ case PARSE_COOKIE:
+ PG(http_globals)[TRACK_VARS_COOKIE] = array_ptr;
+ break;
+ }
+ break;
+ default:
+ array_ptr=destArray;
+ break;
+ }
+
+ if (arg==PARSE_POST) {
+ sapi_handle_post(array_ptr SLS_CC);
+ return;
+ }
+
+ if (arg == PARSE_GET) { /* GET data */
+ c_var = SG(request_info).query_string;
+ if (c_var && *c_var) {
+ res = (char *) estrdup(c_var);
+ free_buffer = 1;
+ } else {
+ free_buffer = 0;
+ }
+ } else if (arg == PARSE_COOKIE) { /* Cookie data */
+ c_var = SG(request_info).cookie_data;
+ if (c_var && *c_var) {
+ res = (char *) estrdup(c_var);
+ free_buffer = 1;
+ } else {
+ free_buffer = 0;
+ }
+ } else if (arg == PARSE_STRING) { /* String data */
+ res = str;
+ free_buffer = 1;
+ }
+
+ if (!res) {
+ return;
+ }
+
+ switch (arg) {
+ case PARSE_POST:
+ case PARSE_GET:
+ case PARSE_STRING:
+ separator = (char *) estrdup(PG(arg_separator).input);
+ break;
+ case PARSE_COOKIE:
+ separator = ";\0";
+ break;
+ }
+
+ php_mbstr_encoding_handler(array_ptr, res, separator);
+
+ if(arg != PARSE_COOKIE) {
+ efree(separator);
+ }
+
+ if (free_buffer) {
+ efree(res);
+ }
+}
+
+/* {{{ proto array mbstr_gpc_handler(string query, int type)
+ */
+PHP_FUNCTION(mbstr_gpc_handler)
+{
+ pval **arg_str;
+ char *var, *val, *strtok_buf, **val_list;
+ int n, num, *len_list, *elist, elistsz;
+ enum mbfl_no_encoding from_encoding, to_encoding;
+ mbfl_string string, result, *ret;
+ mbfl_encoding_detector *identd;
+ mbfl_buffer_converter *convd;
+ MBSTRLS_FETCH();
+
+ if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg_str) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ if (array_init(return_value) == FAILURE) {
+ RETURN_FALSE;
+ }
+
+ convert_to_string_ex(arg_str);
+ mbfl_string_init(&string);
+ mbfl_string_init(&result);
+ string.no_language = MBSTRG(current_language);
+ string.no_encoding = MBSTRG(current_internal_encoding);
+
+ /* count the variables contained in the query */
+ num = 1;
+ var = Z_STRVAL_PP(arg_str);
+ n = Z_STRLEN_PP(arg_str);
+ while (n > 0) {
+ if (*var == '&') {
+ num++;
+ }
+ var++;
+ n--;
+ }
+ num *= 2;
+ val_list = (char **)ecalloc(num, sizeof(char *));
+ len_list = (int *)ecalloc(num, sizeof(int));
+
+ /* split and decode the query */
+ n = 0;
+ strtok_buf = NULL;
+ var = php_strtok_r(Z_STRVAL_PP(arg_str), "&", &strtok_buf);
+ while (var && n < num) {
+ val = strchr(var, '=');
+ if (val) { /* have a value */
+ *val++ = '\0';
+ val_list[n] = var;
+ len_list[n] = php_url_decode(var, strlen(var));
+ n++;
+ val_list[n] = val;
+ len_list[n] = php_url_decode(val, strlen(val));
+ } else {
+ val_list[n] = var;
+ len_list[n] = php_url_decode(var, strlen(var));
+ n++;
+ val_list[n] = NULL;
+ len_list[n] = 0;
+ }
+ n++;
+ var = php_strtok_r(NULL, "&", &strtok_buf);
+ }
+ num = n;
+
+ /* initialize converter */
+ to_encoding = MBSTRG(current_internal_encoding);
+ elist = MBSTRG(http_input_list);
+ elistsz = MBSTRG(http_input_list_size);
+ if (elistsz <= 0) {
+ from_encoding = mbfl_no_encoding_pass;
+ } else if (elistsz == 1) {
+ from_encoding = *elist;
+ } else {
+ /* auto detect */
+ from_encoding = mbfl_no_encoding_invalid;
+ identd = mbfl_encoding_detector_new(elist, elistsz);
+ if (identd != NULL) {
+ n = 0;
+ while (n < num) {
+ string.val = val_list[n];
+ string.len = len_list[n];
+ if (mbfl_encoding_detector_feed(identd, &string)) {
+ break;
+ }
+ n++;
+ }
+ from_encoding = mbfl_encoding_detector_judge(identd);
+ mbfl_encoding_detector_delete(identd);
+ }
+ if (from_encoding == mbfl_no_encoding_invalid) {
+ from_encoding = mbfl_no_encoding_pass;
+ }
+ }
+ convd = NULL;
+ if (from_encoding != mbfl_no_encoding_pass) {
+ convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
+ if (convd != NULL) {
+ mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+ } else {
+ php_error(E_WARNING, "Unable to create converter in mbstr_gpc_handler()");
+ }
+ }
+
+ /* convert encoding */
+ string.no_encoding = from_encoding;
+ n = 0;
+ while (n < num) {
+ string.val = val_list[n];
+ string.len = len_list[n];
+ ret = NULL;
+ if (convd) {
+ ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
+ }
+ if (ret != NULL) {
+ add_next_index_stringl(return_value, ret->val, ret->len, 0);
+ } else {
+ add_next_index_stringl(return_value, val_list[n], len_list[n], 1);
+ }
+ n++;
+ }
+ if (convd != NULL) {
+ mbfl_buffer_converter_delete(convd);
+ }
+ if (val_list != NULL) {
+ efree((void *)val_list);
+ }
+ if (len_list != NULL) {
+ efree((void *)len_list);
+ }
+}
+/* }}} */
+
+
+
+/* {{{ proto string mbstr_output_handler(string contents, int status)
+ Returns string in output buffer converted to the http_output encoding */
+PHP_FUNCTION(mbstr_output_handler)
+{
+ pval **arg_string, **arg_status;
+ mbfl_string string, result, *ret;
+ SLS_FETCH();
+ MBSTRLS_FETCH();
+
+ if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &arg_string, &arg_status) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+
+ convert_to_string_ex(arg_string);
+ convert_to_long_ex(arg_status);
+
+ ret = NULL;
+ if (SG(sapi_headers).send_default_content_type &&
+ MBSTRG(current_http_output_encoding) != mbfl_no_encoding_pass &&
+ MBSTRG(outconv) == NULL) {
+ MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), MBSTRG(current_http_output_encoding), 0);
+ }
+ if (SG(sapi_headers).send_default_content_type && MBSTRG(outconv) != NULL) {
+ mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
+ mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
+ mbfl_string_init(&string);
+ string.no_language = MBSTRG(current_language);
+ string.no_encoding = MBSTRG(current_internal_encoding);
+ string.val = Z_STRVAL_PP(arg_string);
+ string.len = Z_STRLEN_PP(arg_string);
+ if ((Z_LVAL_PP(arg_status) & PHP_OUTPUT_HANDLER_END) != 0) {
+ ret = mbfl_buffer_converter_feed_result(MBSTRG(outconv), &string, &result);
+ } else {
+ ret = mbfl_buffer_converter_feed_getbuffer(MBSTRG(outconv), &string, &result);
+ }
+ }
+
+ if (ret != NULL) {
+ RETVAL_STRINGL(ret->val, ret->len, 0); /* the string is already strdup()'ed */
+ } else {
+ zval_dtor(return_value);
+ *return_value = **arg_string;
+ zval_copy_ctor(return_value);
+ }
+ if ((Z_LVAL_PP(arg_status) & PHP_OUTPUT_HANDLER_END) != 0) {
+ mbfl_buffer_converter_delete(MBSTRG(outconv));
+ MBSTRG(outconv) = NULL;
+ }
+}
+/* }}} */
+
+
+
+/* {{{ proto int mbstr_strlen(string str, [string encoding])
+ Get character numbers of a string */
+PHP_FUNCTION(mbstr_strlen)
+{
+ pval **arg1, **arg2;
+ int n;
+ mbfl_string string;
+ MBSTRLS_FETCH();
+
+ n = ZEND_NUM_ARGS();
+ if ((n == 1 && zend_get_parameters_ex(1, &arg1) == FAILURE) ||
+ (n == 2 && zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) ||
+ n < 1 || n > 2) {
+ WRONG_PARAM_COUNT;
+ }
+ convert_to_string_ex(arg1);
+ mbfl_string_init(&string);
+ string.no_language = MBSTRG(current_language);
+ string.no_encoding = MBSTRG(current_internal_encoding);
+ string.val = Z_STRVAL_PP(arg1);
+ string.len = Z_STRLEN_PP(arg1);
+
+ if (n == 2) {