From 9dcdffb8bf8a3654fd78bf1a58b30c8e13888a7a Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 9 Jul 2023 18:45:51 +0900 Subject: [PATCH] Make the string index functions closer to symmetric So that irregular parts may be more noticeable. --- string.c | 84 +++++++++++++++++++++----------------------------------- 1 file changed, 32 insertions(+), 52 deletions(-) diff --git a/string.c b/string.c index c34a1808793aef..af0668e4b25dc7 100644 --- a/string.c +++ b/string.c @@ -3809,6 +3809,7 @@ strseq_core(const char *str_ptr, const char *str_ptr_end, long str_len, /* found index in byte */ #define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0) +#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1) static long rb_strseq_index(VALUE str, VALUE sub, long offset, int in_byte) @@ -3866,33 +3867,24 @@ rb_str_index_m(int argc, VALUE *argv, VALUE str) long pos; if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { + long slen = str_strlen(str, enc); /* str's enc */ pos = NUM2LONG(initpos); - } - else { - pos = 0; - } - if (pos < 0) { - pos += str_strlen(str, NULL); - if (pos < 0) { + if (pos < 0 ? (pos += slen) < 0 : pos > slen) { if (RB_TYPE_P(sub, T_REGEXP)) { rb_backref_set(Qnil); } return Qnil; } } + else { + pos = 0; + } if (RB_TYPE_P(sub, T_REGEXP)) { - if (pos > str_strlen(str, NULL)) { - rb_backref_set(Qnil); - return Qnil; - } pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos, enc, single_byte_optimizable(str)); - if (rb_reg_search(sub, str, pos, 0) < 0) { - return Qnil; - } - else { + if (rb_reg_search(sub, str, pos, 0) >= 0) { VALUE match = rb_backref_get(); struct re_registers *regs = RMATCH_REGS(match); pos = rb_str_sublen(str, BEG(0)); @@ -3902,11 +3894,12 @@ rb_str_index_m(int argc, VALUE *argv, VALUE str) else { StringValue(sub); pos = rb_str_index(str, sub, pos); - pos = rb_str_sublen(str, pos); + if (pos >= 0) { + pos = rb_str_sublen(str, pos); + return LONG2NUM(pos); + } } - - if (pos == -1) return Qnil; - return LONG2NUM(pos); + return Qnil; } /* Ensure that the given pos is a valid character boundary. @@ -3977,10 +3970,7 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str) if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { long slen = RSTRING_LEN(str); pos = NUM2LONG(initpos); - if (pos < 0) { - pos += slen; - } - if (pos < 0 || pos > slen) { + if (pos < 0 ? (pos += slen) < 0 : pos > slen) { if (RB_TYPE_P(sub, T_REGEXP)) { rb_backref_set(Qnil); } @@ -3994,10 +3984,7 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str) str_ensure_byte_pos(str, pos); if (RB_TYPE_P(sub, T_REGEXP)) { - if (rb_reg_search(sub, str, pos, 0) < 0) { - return Qnil; - } - else { + if (rb_reg_search(sub, str, pos, 0) >= 0) { VALUE match = rb_backref_get(); struct re_registers *regs = RMATCH_REGS(match); pos = BEG(0); @@ -4006,11 +3993,10 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str) } else { StringValue(sub); - pos = rb_strseq_index(str, sub, pos, 1); + pos = rb_str_byteindex(str, sub, pos); + if (pos >= 0) return LONG2NUM(pos); } - - if (pos == -1) return Qnil; - return LONG2NUM(pos); + return Qnil; } #ifdef HAVE_MEMRCHR @@ -4163,20 +4149,17 @@ static VALUE rb_str_rindex_m(int argc, VALUE *argv, VALUE str) { VALUE sub; - VALUE vpos; + VALUE initpos; rb_encoding *enc = STR_ENC_GET(str); long pos, len = str_strlen(str, enc); /* str's enc */ - if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) { - pos = NUM2LONG(vpos); - if (pos < 0) { - pos += len; - if (pos < 0) { - if (RB_TYPE_P(sub, T_REGEXP)) { - rb_backref_set(Qnil); - } - return Qnil; + if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { + pos = NUM2LONG(initpos); + if (pos < 0 && (pos += len) < 0) { + if (RB_TYPE_P(sub, T_REGEXP)) { + rb_backref_set(Qnil); } + return Qnil; } if (pos > len) pos = len; } @@ -4302,19 +4285,16 @@ static VALUE rb_str_byterindex_m(int argc, VALUE *argv, VALUE str) { VALUE sub; - VALUE vpos; + VALUE initpos; long pos, len = RSTRING_LEN(str); - if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) { - pos = NUM2LONG(vpos); - if (pos < 0) { - pos += len; - if (pos < 0) { - if (RB_TYPE_P(sub, T_REGEXP)) { - rb_backref_set(Qnil); - } - return Qnil; + if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { + pos = NUM2LONG(initpos); + if (pos < 0 && (pos += len) < 0) { + if (RB_TYPE_P(sub, T_REGEXP)) { + rb_backref_set(Qnil); } + return Qnil; } if (pos > len) pos = len; } @@ -5570,7 +5550,7 @@ static long rb_pat_search(VALUE pat, VALUE str, long pos, int set_backref_str) { if (BUILTIN_TYPE(pat) == T_STRING) { - pos = rb_strseq_index(str, pat, pos, 1); + pos = rb_str_byteindex(str, pat, pos); if (set_backref_str) { if (pos >= 0) { str = rb_str_new_frozen_String(str);