Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function rb_reg_onig_match #8123

Merged
merged 1 commit into from
Jul 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ext/strscan/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
if RUBY_ENGINE == 'ruby'
$INCFLAGS << " -I$(top_srcdir)" if $extmk
have_func("onig_region_memsize", "ruby.h")
have_func("rb_reg_onig_match", "ruby.h")
create_makefile 'strscan'
else
File.write('Makefile', dummy_makefile("").join)
Expand Down
107 changes: 68 additions & 39 deletions ext/strscan/strscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,68 @@ adjust_register_position(struct strscanner *p, long position)
}
}

/* rb_reg_onig_match is available in Ruby 3.3 and later. */
#ifndef HAVE_RB_REG_ONIG_MATCH
static OnigPosition
rb_reg_onig_match(VALUE re, VALUE str,
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
void *args, struct re_registers *regs)
{
regex_t *reg = rb_reg_prepare_re(re, str);

bool tmpreg = reg != RREGEXP_PTR(re);
if (!tmpreg) RREGEXP(re)->usecnt++;

OnigPosition result = match(reg, str, regs, args);

if (!tmpreg) RREGEXP(re)->usecnt--;
if (tmpreg) {
if (RREGEXP(re)->usecnt) {
onig_free(reg);
}
else {
onig_free(RREGEXP_PTR(re));
RREGEXP_PTR(re) = reg;
}
}

if (result < 0) {
if (result != ONIG_MISMATCH) {
rb_raise(ScanError, "regexp buffer overflow");
}
}

return result;
}
#endif

static OnigPosition
strscan_match(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr)
{
struct strscanner *p = (struct strscanner *)args_ptr;

return onig_match(reg,
match_target(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
(UChar* )CURPTR(p),
regs,
ONIG_OPTION_NONE);
}

static OnigPosition
strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr)
{
struct strscanner *p = (struct strscanner *)args_ptr;

return onig_search(reg,
match_target(p),
(UChar *)(CURPTR(p) + S_RESTLEN(p)),
(UChar *)CURPTR(p),
(UChar *)(CURPTR(p) + S_RESTLEN(p)),
regs,
ONIG_OPTION_NONE);
}

static VALUE
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
{
Expand All @@ -560,47 +622,14 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
}

if (RB_TYPE_P(pattern, T_REGEXP)) {
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
regex_t *re;
long ret;
int tmpreg;

p->regex = pattern;
re = rb_reg_prepare_re(pattern, p->str);
tmpreg = re != RREGEXP_PTR(pattern);
if (!tmpreg) RREGEXP(pattern)->usecnt++;

if (headonly) {
ret = onig_match(re,
match_target(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
(UChar* )CURPTR(p),
&(p->regs),
ONIG_OPTION_NONE);
}
else {
ret = onig_search(re,
match_target(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
(UChar* )CURPTR(p),
(UChar* )(CURPTR(p) + S_RESTLEN(p)),
&(p->regs),
ONIG_OPTION_NONE);
}
if (!tmpreg) RREGEXP(pattern)->usecnt--;
if (tmpreg) {
if (RREGEXP(pattern)->usecnt) {
onig_free(re);
}
else {
onig_free(RREGEXP_PTR(pattern));
RREGEXP_PTR(pattern) = re;
}
}
OnigPosition ret = rb_reg_onig_match(pattern,
p->str,
headonly ? strscan_match : strscan_search,
(void *)p,
&(p->regs));

if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
if (ret < 0) {
/* not matched */
if (ret == ONIG_MISMATCH) {
return Qnil;
}
}
Expand Down
18 changes: 11 additions & 7 deletions include/ruby/re.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <stdio.h>

#include "ruby/onigmo.h"
#include "ruby/regex.h"
#include "ruby/internal/core/rmatch.h"
#include "ruby/internal/dllexport.h"
Expand Down Expand Up @@ -105,25 +106,28 @@ long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir);
VALUE rb_reg_quote(VALUE str);

/**
* Exercises various checks and preprocesses so that the given regular
* expression can be applied to the given string. The preprocess here includes
* (but not limited to) for instance encoding conversion.
* Runs a regular expression match using function `match`. Performs preparation,
* error handling, and memory cleanup.
*
* @param[in] re Target regular expression.
* @param[in] str What `re` is about to run on.
* @param[in] match The function to run to match `str` against `re`.
* @param[in] args Pointer to arguments to pass into `match`.
* @param[out] regs Registers on a successful match.
* @exception rb_eArgError `re` does not fit for `str`.
* @exception rb_eEncCompatError `re` and `str` are incompatible.
* @exception rb_eRegexpError `re` is malformed.
* @return A preprocessesed pattern buffer ready to be applied to `str`.
* @note The return value is manages by our GC. Don't free.
* @return Match position on a successful match, `ONIG_MISMATCH` otherwise.
*
* @internal
*
* The return type, `regex_t *`, is defined in `<ruby/onigmo.h>`, _and_
* The type `regex_t *` is defined in `<ruby/onigmo.h>`, _and_
* _conflicts_ with POSIX's `<regex.h>`. We can no longer save the situation
* at this point. Just don't mix the two.
*/
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand that you changed strscan to stop using this, but this is still a C API exposed by this public header. Are you sure existing gems don't use this?

Copy link
Member

@k0kubun k0kubun Jul 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, strscan 3.0.7 has not been released yet, so I cannot even run railsbench with Ruby master anymore (unless I modify Gemfile.lock to use 3.0.7, which isn't good until released). Let me re-expose the API for now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done 9721972

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked through gem-codesearch and found that the only gems that use it are abandoned (match_at and mmapscanner).

I forgot that previous versions of strscan installed in a Gemfile still needs this function. Thanks for fixing it!

OnigPosition rb_reg_onig_match(VALUE re, VALUE str,
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
void *args, struct re_registers *regs);

/**
* Duplicates a match data. This is roughly the same as `onig_region_copy()`,
Expand Down
Loading
Loading