Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* include/ruby/oniguruma.h (OnigEncodingTypeST): add end argument for
  left_adjust_char_head.
  (ONIGENC_LEFT_ADJUST_CHAR_HEAD): add end argument.
  (onigenc_get_left_adjust_char_head): ditto.

* include/ruby/encoding.h (rb_enc_left_char_head): add end argument.

* regenc.h (onigenc_single_byte_left_adjust_char_head): ditto.

* regenc.c (onigenc_get_right_adjust_char_head): follow the interface
  change.
  (onigenc_get_right_adjust_char_head_with_prev): ditto.
  (onigenc_get_prev_char_head): ditto.
  (onigenc_step_back): ditto.
  (onigenc_get_left_adjust_char_head): ditto.
  (onigenc_single_byte_code_to_mbc): ditto.

* re.c: ditto.

* string.c: ditto.

* io.c: ditto.

* regexec.c: ditto.

* enc/euc_jp.c: ditto.

* enc/cp949.c: ditto.

* enc/shift_jis.c: ditto.

* enc/gbk.c: ditto.

* enc/big5.c: ditto.

* enc/euc_tw.c: ditto.

* enc/euc_kr.c: ditto.

* enc/emacs_mule.c: ditto.

* enc/gb18030.c: ditto.

* enc/utf_8.c: ditto.

* enc/utf_16le.c: ditto.

* enc/utf_16be.c: ditto.

* enc/utf_32le.c: ditto.

* enc/utf_32be.c: ditto.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19334 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information
akr committed Sep 13, 2008
1 parent 10ff0f2 commit 1941660
Show file tree
Hide file tree
Showing 23 changed files with 110 additions and 52 deletions.
55 changes: 55 additions & 0 deletions ChangeLog
@@ -1,3 +1,58 @@
Sun Sep 14 04:15:16 2008 Tanaka Akira <akr@fsij.org>

* include/ruby/oniguruma.h (OnigEncodingTypeST): add end argument for
left_adjust_char_head.
(ONIGENC_LEFT_ADJUST_CHAR_HEAD): add end argument.
(onigenc_get_left_adjust_char_head): ditto.

* include/ruby/encoding.h (rb_enc_left_char_head): add end argument.

* regenc.h (onigenc_single_byte_left_adjust_char_head): ditto.

* regenc.c (onigenc_get_right_adjust_char_head): follow the interface
change.
(onigenc_get_right_adjust_char_head_with_prev): ditto.
(onigenc_get_prev_char_head): ditto.
(onigenc_step_back): ditto.
(onigenc_get_left_adjust_char_head): ditto.
(onigenc_single_byte_code_to_mbc): ditto.

* re.c: ditto.

* string.c: ditto.

* io.c: ditto.

* regexec.c: ditto.

* enc/euc_jp.c: ditto.

* enc/cp949.c: ditto.

* enc/shift_jis.c: ditto.

* enc/gbk.c: ditto.

* enc/big5.c: ditto.

* enc/euc_tw.c: ditto.

* enc/euc_kr.c: ditto.

* enc/emacs_mule.c: ditto.

* enc/gb18030.c: ditto.

* enc/utf_8.c: ditto.

* enc/utf_16le.c: ditto.

* enc/utf_16be.c: ditto.

* enc/utf_32le.c: ditto.

* enc/utf_32be.c: ditto.

Sun Sep 14 03:43:27 2008 Tanaka Akira <akr@fsij.org>

* include/ruby/oniguruma.h (ONIGENC_STEP_BACK): add end argument.
Expand Down
4 changes: 2 additions & 2 deletions enc/big5.c
Expand Up @@ -165,7 +165,7 @@ static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)]

static UChar*
big5_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
big5_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
{
const UChar *p;
int len;
Expand All @@ -181,7 +181,7 @@ big5_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
}
}
}
len = enclen(enc, p, s);
len = enclen(enc, p, end);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
Expand Down
4 changes: 2 additions & 2 deletions enc/cp949.c
Expand Up @@ -165,7 +165,7 @@ cp949_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
}

static UChar*
cp949_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
cp949_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
{
const UChar *p;
int len;
Expand All @@ -181,7 +181,7 @@ cp949_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc
}
}
}
len = enclen(enc, p, s);
len = enclen(enc, p, end);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
Expand Down
2 changes: 1 addition & 1 deletion enc/emacs_mule.c
Expand Up @@ -293,7 +293,7 @@ mbc_case_fold(OnigCaseFoldType flag,
}

static UChar*
left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
{
const UChar *p;

Expand Down
4 changes: 2 additions & 2 deletions enc/euc_jp.c
Expand Up @@ -222,7 +222,7 @@ mbc_case_fold(OnigCaseFoldType flag,
}

static UChar*
left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
{
/* In this encoding
mb-trail bytes doesn't mix with single bytes.
Expand All @@ -234,7 +234,7 @@ left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
p = s;

while (!eucjp_islead(*p) && p > start) p--;
len = enclen(enc, p, s);
len = enclen(enc, p, end);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
Expand Down
4 changes: 2 additions & 2 deletions enc/euc_kr.c
Expand Up @@ -145,7 +145,7 @@ euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff)

static UChar*
euckr_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
euckr_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
Expand All @@ -157,7 +157,7 @@ euckr_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc
p = s;

while (!euckr_islead(*p) && p > start) p--;
len = enclen(enc, p, s);
len = enclen(enc, p, end);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
Expand Down
4 changes: 2 additions & 2 deletions enc/euc_tw.c
Expand Up @@ -187,7 +187,7 @@ euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
#define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)

static UChar*
euctw_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
euctw_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
Expand All @@ -199,7 +199,7 @@ euctw_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc
p = s;

while (!euctw_islead(*p) && p > start) p--;
len = enclen(enc, p, s);
len = enclen(enc, p, end);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
Expand Down
2 changes: 1 addition & 1 deletion enc/gb18030.c
Expand Up @@ -245,7 +245,7 @@ enum state {
};

static UChar*
gb18030_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
gb18030_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
{
const UChar *p;
enum state state = S_START;
Expand Down
4 changes: 2 additions & 2 deletions enc/gbk.c
Expand Up @@ -165,7 +165,7 @@ gbk_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
}

static UChar*
gbk_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
gbk_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
{
const UChar *p;
int len;
Expand All @@ -181,7 +181,7 @@ gbk_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
}
}
}
len = enclen(enc, p, s);
len = enclen(enc, p, end);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
Expand Down
4 changes: 2 additions & 2 deletions enc/shift_jis.c
Expand Up @@ -229,7 +229,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
#endif

static UChar*
left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
{
const UChar *p;
int len;
Expand All @@ -245,7 +245,7 @@ left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
}
}
}
len = enclen(enc, p, s);
len = enclen(enc, p, end);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
Expand Down
2 changes: 1 addition & 1 deletion enc/utf_16be.c
Expand Up @@ -212,7 +212,7 @@ utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* e
#endif

static UChar*
utf16be_left_adjust_char_head(const UChar* start, const UChar* s,
utf16be_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end,
OnigEncoding enc ARG_UNUSED)
{
if (s <= start) return (UChar* )s;
Expand Down
2 changes: 1 addition & 1 deletion enc/utf_16le.c
Expand Up @@ -204,7 +204,7 @@ utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
#endif

static UChar*
utf16le_left_adjust_char_head(const UChar* start, const UChar* s,
utf16le_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end,
OnigEncoding enc ARG_UNUSED)
{
if (s <= start) return (UChar* )s;
Expand Down
2 changes: 1 addition & 1 deletion enc/utf_32be.c
Expand Up @@ -152,7 +152,7 @@ utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* e
#endif

static UChar*
utf32be_left_adjust_char_head(const UChar* start, const UChar* s,
utf32be_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end,
OnigEncoding enc ARG_UNUSED)
{
int rem;
Expand Down
2 changes: 1 addition & 1 deletion enc/utf_32le.c
Expand Up @@ -152,7 +152,7 @@ utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* e
#endif

static UChar*
utf32le_left_adjust_char_head(const UChar* start, const UChar* s,
utf32le_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end,
OnigEncoding enc ARG_UNUSED)
{
int rem;
Expand Down
2 changes: 1 addition & 1 deletion enc/utf_8.c
Expand Up @@ -405,7 +405,7 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,


static UChar*
left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc ARG_UNUSED)
left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc ARG_UNUSED)
{
const UChar *p;

Expand Down
2 changes: 1 addition & 1 deletion include/ruby/encoding.h
Expand Up @@ -132,7 +132,7 @@ int rb_enc_codelen(int code, rb_encoding *enc);
/* start, ptr, end, encoding -> prev_char */
#define rb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
/* start, ptr, end, encoding -> next_char */
#define rb_enc_left_char_head(s,p,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p))
#define rb_enc_left_char_head(s,p,e,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
#define rb_enc_right_char_head(s,p,e,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))

/* ptr, ptr, encoding -> newline_or_not */
Expand Down
8 changes: 4 additions & 4 deletions include/ruby/oniguruma.h
Expand Up @@ -164,7 +164,7 @@ typedef struct OnigEncodingTypeST {
int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, struct OnigEncodingTypeST* enc);
int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc);
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, struct OnigEncodingTypeST* enc);
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
int ruby_encoding_index;
} OnigEncodingType;
Expand Down Expand Up @@ -219,8 +219,8 @@ ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
(enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
(enc)->is_allowed_reverse_match(s,end,enc)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
(enc)->left_adjust_char_head(start, s, enc)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \
(enc)->left_adjust_char_head(start, s, end, enc)
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
(enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
Expand Down Expand Up @@ -307,7 +307,7 @@ OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, co
ONIG_EXTERN
OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
ONIG_EXTERN
OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
ONIG_EXTERN
OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
ONIG_EXTERN
Expand Down
9 changes: 5 additions & 4 deletions io.c
Expand Up @@ -2177,21 +2177,22 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
else
enc = io_input_encoding(fptr);
while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
const char *s, *p, *pp;
const char *s, *p, *pp, *e;

if (c == newline) {
if (RSTRING_LEN(str) < rslen) continue;
s = RSTRING_PTR(str);
p = s + RSTRING_LEN(str) - rslen;
pp = rb_enc_left_char_head(s, p, enc);
e = s + RSTRING_LEN(str);
p = e - rslen;
pp = rb_enc_left_char_head(s, p, e, enc);
if (pp != p) continue;
if (!rspara) rscheck(rsptr, rslen, rs);
if (memcmp(p, rsptr, rslen) == 0) break;
}
if (limit == 0) {
s = RSTRING_PTR(str);
p = s + RSTRING_LEN(str);
pp = rb_enc_left_char_head(s, p-1, enc);
pp = rb_enc_left_char_head(s, p-1, p, enc);
if (extra_limit &&
MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(pp, p, enc))) {
/* relax the limit while incomplete character.
Expand Down
2 changes: 1 addition & 1 deletion re.c
Expand Up @@ -1267,7 +1267,7 @@ rb_reg_adjust_startpos(VALUE re, VALUE str, int pos, int reverse)
p = onigenc_get_right_adjust_char_head(enc, string, string + pos, string + RSTRING_LEN(str));
}
else {
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos);
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos, string + RSTRING_LEN(str));
}
return p - string;
}
Expand Down
13 changes: 7 additions & 6 deletions regenc.c
Expand Up @@ -64,7 +64,7 @@ onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEnc
extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
if (p < s) {
p += enclen(enc, p, end);
}
Expand All @@ -75,7 +75,7 @@ extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);

if (p < s) {
if (prev) *prev = (const UChar* )p;
Expand All @@ -93,7 +93,7 @@ onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s,
if (s <= start)
return (UChar* )NULL;

return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
}

extern UChar*
Expand All @@ -103,7 +103,7 @@ onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UC
if (s <= start)
return (UChar* )NULL;

s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
}
return (UChar* )s;
}
Expand Down Expand Up @@ -369,9 +369,9 @@ onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
}

extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
{
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
}

const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
Expand Down Expand Up @@ -637,6 +637,7 @@ onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc

extern UChar*
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
const UChar* end,
OnigEncoding enc ARG_UNUSED)
{
return (UChar* )s;
Expand Down
2 changes: 1 addition & 1 deletion regenc.h
Expand Up @@ -125,7 +125,7 @@ ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p, const UChar*
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end, OnigEncoding enc));
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf, OnigEncoding enc));
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s, OnigEncoding enc));
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc));
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end, OnigEncoding enc));
ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end, OnigEncoding enc));
ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc));
Expand Down

0 comments on commit 1941660

Please sign in to comment.