Skip to content

Commit

Permalink
* grapheme cluster implementation reverted. [ruby-dev:36375]
Browse files Browse the repository at this point in the history
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19417 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information
akr committed Sep 18, 2008
1 parent 22987dc commit 081c802
Show file tree
Hide file tree
Showing 25 changed files with 234 additions and 892 deletions.
4 changes: 4 additions & 0 deletions ChangeLog
@@ -1,3 +1,7 @@
Thu Sep 18 21:37:14 2008 Tanaka Akira <akr@fsij.org>

* grapheme cluster implementation reverted. [ruby-dev:36375]

Thu Sep 18 20:50:36 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>

* lib/rake.rb (Rake::Application#standard_exception_handling):
Expand Down
4 changes: 2 additions & 2 deletions enc/big5.c
Expand Up @@ -108,9 +108,9 @@ big5_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
}

static OnigCodePoint
big5_mbc_to_code(const UChar* p, const UChar* end, int *precise_ret, OnigEncoding enc)
big5_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
{
return onigenc_mbn_mbc_to_code(enc, p, end, precise_ret);
return onigenc_mbn_mbc_to_code(enc, p, end);
}

static int
Expand Down
4 changes: 2 additions & 2 deletions enc/cp949.c
Expand Up @@ -130,9 +130,9 @@ cp949_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
}

static OnigCodePoint
cp949_mbc_to_code(const UChar* p, const UChar* end, int *precise_ret, OnigEncoding enc)
cp949_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
{
return onigenc_mbn_mbc_to_code(enc, p, end, precise_ret);
return onigenc_mbn_mbc_to_code(enc, p, end);
}

static int
Expand Down
16 changes: 14 additions & 2 deletions enc/emacs_mule.c
Expand Up @@ -223,9 +223,21 @@ mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
}

static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end, int *precise_ret, OnigEncoding enc)
mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
{
return onigenc_mbn_mbc_to_code(enc, p, end, precise_ret);
int c, i, len;
OnigCodePoint n;

len = enclen(enc, p, end);
n = (OnigCodePoint )*p++;
if (len == 1) return n;

for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}

static int
Expand Down
16 changes: 14 additions & 2 deletions enc/euc_jp.c
Expand Up @@ -133,9 +133,21 @@ mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
}

static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end, int *precise_ret, OnigEncoding enc)
mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
{
return onigenc_mbn_mbc_to_code(enc, p, end, precise_ret);
int c, i, len;
OnigCodePoint n;

len = enclen(enc, p, end);
n = (OnigCodePoint )*p++;
if (len == 1) return n;

for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}

static int
Expand Down
4 changes: 2 additions & 2 deletions enc/euc_kr.c
Expand Up @@ -108,9 +108,9 @@ euckr_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
}

static OnigCodePoint
euckr_mbc_to_code(const UChar* p, const UChar* end, int *precise_ret, OnigEncoding enc)
euckr_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
{
return onigenc_mbn_mbc_to_code(enc, p, end, precise_ret);
return onigenc_mbn_mbc_to_code(enc, p, end);
}

static int
Expand Down
4 changes: 2 additions & 2 deletions enc/euc_tw.c
Expand Up @@ -150,9 +150,9 @@ euctw_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
}

static OnigCodePoint
euctw_mbc_to_code(const UChar* p, const UChar* end, int *precise_ret, OnigEncoding enc ARG_UNUSED)
euctw_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
{
return onigenc_mbn_mbc_to_code(enc, p, end, precise_ret);
return onigenc_mbn_mbc_to_code(enc, p, end);
}

static int
Expand Down
16 changes: 14 additions & 2 deletions enc/gb18030.c
Expand Up @@ -164,9 +164,21 @@ gb18030_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
}

static OnigCodePoint
gb18030_mbc_to_code(const UChar* p, const UChar* end, int *precise_ret, OnigEncoding enc)
gb18030_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
{
return onigenc_mbn_mbc_to_code(enc, p, end, precise_ret);
int c, i, len;
OnigCodePoint n;

len = enclen(enc, p, end);
n = (OnigCodePoint )(*p++);
if (len == 1) return n;

for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}

static int
Expand Down
4 changes: 2 additions & 2 deletions enc/gbk.c
Expand Up @@ -130,9 +130,9 @@ gbk_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
}

static OnigCodePoint
gbk_mbc_to_code(const UChar* p, const UChar* end, int *precise_ret, OnigEncoding enc)
gbk_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
{
return onigenc_mbn_mbc_to_code(enc, p, end, precise_ret);
return onigenc_mbn_mbc_to_code(enc, p, end);
}

static int
Expand Down
17 changes: 15 additions & 2 deletions enc/shift_jis.c
Expand Up @@ -145,9 +145,22 @@ code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
}

static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end, int *precise_ret, OnigEncoding enc)
mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
{
return onigenc_mbn_mbc_to_code(enc, p, end, precise_ret);
int c, i, len;
OnigCodePoint n;

len = enclen(enc, p, end);
c = *p++;
n = c;
if (len == 1) return n;

for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}

static int
Expand Down
20 changes: 10 additions & 10 deletions enc/unicode.c
Expand Up @@ -10966,13 +10966,13 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,
{
CodePointList3 *to;
OnigCodePoint code;
int i, len, rlen, r;
int i, len, rlen;
const UChar *p = *pp;

if (CaseFoldInited == 0) init_case_fold_table();

code = ONIGENC_MBC_PRECISE_CODEPOINT(enc, p, end, &r);
len = ONIGENC_MBCLEN_CHARFOUND_LEN(r);
code = ONIGENC_MBC_TO_CODE(enc, p, end);
len = enclen(enc, p, end);
*pp += len;

#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
Expand Down Expand Up @@ -11160,7 +11160,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[])
{
int n, i, j, k, len, r;
int n, i, j, k, len;
OnigCodePoint code, codes[3];
CodePointList3 *to, *z3;
CodePointList2 *z2;
Expand All @@ -11169,8 +11169,8 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,

n = 0;

code = ONIGENC_MBC_PRECISE_CODEPOINT(enc, p, end, &r);
len = ONIGENC_MBCLEN_CHARFOUND_LEN(r);
code = ONIGENC_MBC_TO_CODE(enc, p, end);
len = enclen(enc, p, end);

#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
Expand Down Expand Up @@ -11311,15 +11311,15 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
int clen;

codes[0] = code;
code = ONIGENC_MBC_PRECISE_CODEPOINT(enc, p, end, &r);
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
&& to->n == 1) {
codes[1] = to->code[0];
}
else
codes[1] = code;

clen = ONIGENC_MBCLEN_CHARFOUND_LEN(r);
clen = enclen(enc, p, end);
len += clen;
if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) {
for (i = 0; i < z2->n; i++) {
Expand All @@ -11332,15 +11332,15 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,

p += clen;
if (p < end) {
code = ONIGENC_MBC_PRECISE_CODEPOINT(enc, p, end, &r);
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
&& to->n == 1) {
codes[2] = to->code[0];
}
else
codes[2] = code;

clen = ONIGENC_MBCLEN_CHARFOUND_LEN(r);
clen = enclen(enc, p, end);
len += clen;
if (onig_st_lookup(Unfold3Table, (st_data_t )codes,
(void* )&z2) != 0) {
Expand Down
6 changes: 0 additions & 6 deletions enc/utf_16be.c
Expand Up @@ -103,15 +103,9 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end,

static OnigCodePoint
utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
int *precise_ret,
OnigEncoding enc)
{
OnigCodePoint code;
int ret;

ret = utf16be_mbc_enc_len(p, end, enc);
if (precise_ret)
*precise_ret = ret;

if (UTF16_IS_SURROGATE_FIRST(*p)) {
code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
Expand Down
6 changes: 0 additions & 6 deletions enc/utf_16le.c
Expand Up @@ -95,17 +95,11 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end,

static OnigCodePoint
utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
int *precise_ret,
OnigEncoding enc ARG_UNUSED)
{
OnigCodePoint code;
UChar c0 = *p;
UChar c1 = *(p+1);
int ret;

ret = utf16le_mbc_enc_len(p, end, enc);
if (precise_ret)
*precise_ret = ret;

if (UTF16_IS_SURROGATE_FIRST(c1)) {
code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
Expand Down
3 changes: 0 additions & 3 deletions enc/utf_32be.c
Expand Up @@ -61,11 +61,8 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end,

static OnigCodePoint
utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
int *precise_ret,
OnigEncoding enc ARG_UNUSED)
{
if (precise_ret)
*precise_ret = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
}

Expand Down
3 changes: 0 additions & 3 deletions enc/utf_32le.c
Expand Up @@ -61,11 +61,8 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end,

static OnigCodePoint
utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
int *precise_ret,
OnigEncoding enc ARG_UNUSED)
{
if (precise_ret)
*precise_ret = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
}

Expand Down

0 comments on commit 081c802

Please sign in to comment.