diff --git a/benchmark/regexp_dup.yml b/benchmark/regexp_dup.yml new file mode 100644 index 00000000000000..52f89991cdf439 --- /dev/null +++ b/benchmark/regexp_dup.yml @@ -0,0 +1,6 @@ +prelude: | + str = "a" * 1000 + re = Regexp.new(str) + +benchmark: + dup: re.dup diff --git a/benchmark/regexp_new.yml b/benchmark/regexp_new.yml new file mode 100644 index 00000000000000..bc9ab3ca21e966 --- /dev/null +++ b/benchmark/regexp_new.yml @@ -0,0 +1,7 @@ +prelude: | + str = "a" * 1000 + re = Regexp.new(str) + +benchmark: + string: Regexp.new(str) + regexp: Regexp.new(re) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 0a5400c3a5f07e..d233336316ee19 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -844,6 +844,8 @@ void onig_free(OnigRegex); ONIG_EXTERN void onig_free_body(OnigRegex); ONIG_EXTERN +int onig_reg_copy(OnigRegex* reg, OnigRegex orig_reg); +ONIG_EXTERN OnigPosition onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), void* callback_arg); ONIG_EXTERN OnigPosition onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option); diff --git a/re.c b/re.c index f6abf46131302d..5fc005552f68c6 100644 --- a/re.c +++ b/re.c @@ -3213,6 +3213,15 @@ rb_reg_preprocess_dregexp(VALUE ary, int options) return result; } +static void +rb_reg_initialize_check(VALUE obj) +{ + rb_check_frozen(obj); + if (RREGEXP_PTR(obj)) { + rb_raise(rb_eTypeError, "already initialized regexp"); + } +} + static int rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc, int options, onig_errmsg_buffer err, @@ -3223,10 +3232,7 @@ rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc, rb_encoding *fixed_enc = 0; rb_encoding *a_enc = rb_ascii8bit_encoding(); - rb_check_frozen(obj); - if (re->ptr) - rb_raise(rb_eTypeError, "already initialized regexp"); - re->ptr = 0; + rb_reg_initialize_check(obj); if (rb_enc_dummy_p(enc)) { errcpy(err, "can't make regexp with dummy encoding"); @@ -3862,6 +3868,24 @@ set_timeout(rb_hrtime_t *hrt, VALUE timeout) double2hrtime(hrt, timeout_d); } +static VALUE +reg_copy(VALUE copy, VALUE orig) +{ + int r; + regex_t *re; + + rb_reg_initialize_check(copy); + if ((r = onig_reg_copy(&re, RREGEXP_PTR(orig))) != 0) { + /* ONIGERR_MEMORY only */ + rb_raise(rb_eRegexpError, "%s", onig_error_code_to_format(r)); + } + RREGEXP_PTR(copy) = re; + RB_OBJ_WRITE(copy, &RREGEXP(copy)->src, RREGEXP(orig)->src); + RREGEXP_PTR(copy)->timelimit = RREGEXP_PTR(orig)->timelimit; + rb_enc_copy(copy, orig); + return copy; +} + struct reg_init_args { VALUE str; VALUE timeout; @@ -3931,9 +3955,14 @@ static VALUE rb_reg_initialize_m(int argc, VALUE *argv, VALUE self) { struct reg_init_args args; + VALUE re = reg_extract_args(argc, argv, &args); - reg_extract_args(argc, argv, &args); - reg_init_args(self, args.str, args.enc, args.flags); + if (NIL_P(re)) { + reg_init_args(self, args.str, args.enc, args.flags); + } + else { + reg_copy(self, re); + } set_timeout(&RREGEXP_PTR(self)->timelimit, args.timeout); @@ -4356,7 +4385,7 @@ rb_reg_init_copy(VALUE copy, VALUE re) { if (!OBJ_INIT_COPY(copy, re)) return copy; rb_reg_check(re); - return rb_reg_init_str(copy, RREGEXP_SRC(re), rb_reg_options(re)); + return reg_copy(copy, re); } VALUE diff --git a/regcomp.c b/regcomp.c index be85d85f93d476..b4dbddfa01ff10 100644 --- a/regcomp.c +++ b/regcomp.c @@ -5671,6 +5671,80 @@ onig_free(regex_t* reg) } } +static void* +dup_copy(const void *ptr, size_t size) +{ + void *newptr = xmalloc(size); + if (IS_NOT_NULL(newptr)) { + memcpy(newptr, ptr, size); + } + return newptr; +} + +extern int +onig_reg_copy(regex_t** nreg, regex_t* oreg) +{ + if (IS_NOT_NULL(oreg)) { + regex_t *reg = *nreg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(reg)) return ONIGERR_MEMORY; + + *reg = *oreg; + +# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size)) + + if (IS_NOT_NULL(reg->exact)) { + size_t exact_size = reg->exact_end - reg->exact; + if (COPY_FAILED(exact, exact_size)) + goto err; + (reg)->exact_end = (reg)->exact + exact_size; + } + + if (IS_NOT_NULL(reg->int_map)) { + if (COPY_FAILED(int_map, sizeof(int) * ONIG_CHAR_TABLE_SIZE)) + goto err_int_map; + } + if (IS_NOT_NULL(reg->int_map_backward)) { + if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE)) + goto err_int_map_backward; + } + if (IS_NOT_NULL(reg->p)) { + if (COPY_FAILED(p, reg->alloc)) + goto err_p; + } + if (IS_NOT_NULL(reg->repeat_range)) { + if (COPY_FAILED(repeat_range, reg->repeat_range_alloc * sizeof(OnigRepeatRange))) + goto err_repeat_range; + } + if (IS_NOT_NULL(reg->name_table)) { + if (IS_NULL(reg->name_table = st_copy(reg->name_table))) + goto err_name_table; + } + if (IS_NOT_NULL(reg->chain)) { + if (onig_reg_copy(®->chain, reg->chain)) + goto err_chain; + } + return 0; +# undef COPY_FAILED + + err_chain: + onig_st_free_table(reg->name_table); + err_name_table: + xfree(reg->repeat_range); + err_repeat_range: + xfree(reg->p); + err_p: + xfree(reg->int_map_backward); + err_int_map_backward: + xfree(reg->int_map); + err_int_map: + xfree(reg->exact); + err: + xfree(reg); + return ONIGERR_MEMORY; + } + return 0; +} + #ifdef RUBY size_t onig_memsize(const regex_t *reg)