Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions ext/strscan/lib/strscan/strscan.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# frozen_string_literal: true

class StringScanner
# call-seq:
# scan_integer(base: 10)
#
# If `base` isn't provided or is `10`, then it is equivalent to calling `#scan` with a `[+-]?\d+` pattern,
# and returns an Integer or nil.
#
# If `base` is `16`, then it is equivalent to calling `#scan` with a `[+-]?(0x)?[0-9a-fA-F]+` pattern,
# and returns an Integer or nil.
#
# The scanned string must be encoded with an ASCII compatible encoding, otherwise
# Encoding::CompatibilityError will be raised.
def scan_integer(base: 10)
case base
when 10
scan_base10_integer
when 16
scan_base16_integer
else
raise ArgumentError, "Unsupported integer base: #{base.inspect}, expected 10 or 16"
end
end
end
129 changes: 103 additions & 26 deletions ext/strscan/strscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
extern size_t onig_region_memsize(const struct re_registers *regs);
#endif

#include <ctype.h>
#include <stdbool.h>

#define STRSCAN_VERSION "3.1.1.dev"
Expand All @@ -33,6 +32,8 @@ static VALUE StringScanner;
static VALUE ScanError;
static ID id_byteslice;

static int usascii_encindex, utf8_encindex, binary_encindex;

struct strscanner
{
/* multi-purpose flags */
Expand Down Expand Up @@ -116,7 +117,7 @@ static VALUE strscan_get_byte _((VALUE self));
static VALUE strscan_getbyte _((VALUE self));
static VALUE strscan_peek _((VALUE self, VALUE len));
static VALUE strscan_peep _((VALUE self, VALUE len));
static VALUE strscan_scan_integer _((VALUE self));
static VALUE strscan_scan_base10_integer _((VALUE self));
static VALUE strscan_unscan _((VALUE self));
static VALUE strscan_bol_p _((VALUE self));
static VALUE strscan_eos_p _((VALUE self));
Expand Down Expand Up @@ -684,6 +685,14 @@ strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_pt
ONIG_OPTION_NONE);
}

static void
strscan_enc_check(VALUE str1, VALUE str2)
{
if (RB_ENCODING_GET(str1) != RB_ENCODING_GET(str2)) {
rb_enc_check(str1, str2);
}
}

static VALUE
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
{
Expand Down Expand Up @@ -711,18 +720,21 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
}
else {
StringValue(pattern);
rb_encoding *enc = rb_enc_check(p->str, pattern);
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
strscan_enc_check(p->str, pattern);
return Qnil;
}

if (headonly) {
strscan_enc_check(p->str, pattern);

if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
return Qnil;
}
set_registers(p, RSTRING_LEN(pattern));
}
else {
rb_encoding *enc = rb_enc_check(p->str, pattern);
long pos = rb_memsearch(RSTRING_PTR(pattern), RSTRING_LEN(pattern),
CURPTR(p), S_RESTLEN(p), enc);
if (pos == -1) {
Expand Down Expand Up @@ -1268,27 +1280,50 @@ strscan_peep(VALUE self, VALUE vlen)
return strscan_peek(self, vlen);
}

/*
* call-seq:
* scan_integer
*
* Equivalent to #scan with a [+-]?\d+ pattern, and returns an Integer or nil.
*
* The scanned string must be encoded with an ASCII compatible encoding, otherwise
* Encoding::CompatibilityError will be raised.
*/
static VALUE
strscan_scan_integer(VALUE self)
strscan_parse_integer(struct strscanner *p, int base, long len)
{
char *ptr, *buffer;
long len = 0;
VALUE buffer_v, integer;

char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);

MEMCPY(buffer, CURPTR(p), char, len);
buffer[len] = '\0';
integer = rb_cstr2inum(buffer, base);
RB_ALLOCV_END(buffer_v);
p->curr += len;
return integer;
}

static inline bool
strscan_ascii_compat_fastpath(VALUE str) {
int encindex = ENCODING_GET_INLINED(str);
// The overwhelming majority of strings are in one of these 3 encodings.
return encindex == utf8_encindex || encindex == binary_encindex || encindex == usascii_encindex;
}

static inline void
strscan_must_ascii_compat(VALUE str)
{
// The overwhelming majority of strings are in one of these 3 encodings.
if (RB_LIKELY(strscan_ascii_compat_fastpath(str))) {
return;
}

rb_must_asciicompat(str);
}

static VALUE
strscan_scan_base10_integer(VALUE self)
{
char *ptr;
long len = 0;
struct strscanner *p;

GET_SCANNER(self, p);
CLEAR_MATCH_STATUS(p);

rb_must_asciicompat(p->str);
strscan_must_ascii_compat(p->str);

ptr = CURPTR(p);

Expand All @@ -1302,25 +1337,60 @@ strscan_scan_integer(VALUE self)
len++;
}

if (!isdigit(ptr[len])) {
if (!rb_isdigit(ptr[len])) {
return Qnil;
}

MATCHED(p);
p->prev = p->curr;

while (len < remaining_len && isdigit(ptr[len])) {
while (len < remaining_len && rb_isdigit(ptr[len])) {
len++;
}

buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
return strscan_parse_integer(p, 10, len);
}

MEMCPY(buffer, CURPTR(p), char, len);
buffer[len] = '\0';
integer = rb_cstr2inum(buffer, 10);
RB_ALLOCV_END(buffer_v);
p->curr += len;
return integer;
static VALUE
strscan_scan_base16_integer(VALUE self)
{
char *ptr;
long len = 0;
struct strscanner *p;

GET_SCANNER(self, p);
CLEAR_MATCH_STATUS(p);

strscan_must_ascii_compat(p->str);

ptr = CURPTR(p);

long remaining_len = S_RESTLEN(p);

if (remaining_len <= 0) {
return Qnil;
}

if (ptr[len] == '-' || ptr[len] == '+') {
len++;
}

if ((remaining_len >= (len + 2)) && ptr[len] == '0' && ptr[len + 1] == 'x') {
len += 2;
}

if (len >= remaining_len || !rb_isxdigit(ptr[len])) {
return Qnil;
}

MATCHED(p);
p->prev = p->curr;

while (len < remaining_len && rb_isxdigit(ptr[len])) {
len++;
}

return strscan_parse_integer(p, 16, len);
}

/*
Expand Down Expand Up @@ -2212,6 +2282,10 @@ Init_strscan(void)

id_byteslice = rb_intern("byteslice");

usascii_encindex = rb_usascii_encindex();
utf8_encindex = rb_utf8_encindex();
binary_encindex = rb_ascii8bit_encindex();

StringScanner = rb_define_class("StringScanner", rb_cObject);
ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
if (!rb_const_defined(rb_cObject, id_scanerr)) {
Expand Down Expand Up @@ -2261,7 +2335,8 @@ Init_strscan(void)
rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
rb_define_method(StringScanner, "peep", strscan_peep, 1);

rb_define_method(StringScanner, "scan_integer", strscan_scan_integer, 0);
rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0);
rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0);

rb_define_method(StringScanner, "unscan", strscan_unscan, 0);

Expand Down Expand Up @@ -2290,4 +2365,6 @@ Init_strscan(void)
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);

rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);

rb_require("strscan/strscan");
}
9 changes: 6 additions & 3 deletions ext/strscan/strscan.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@ Gem::Specification.new do |s|
files = [
"COPYING",
"LICENSE.txt",
"lib/strscan/strscan.rb"
]

s.require_paths = %w{lib}

if RUBY_ENGINE == "jruby"
s.require_paths = %w{ext/jruby/lib lib}
files << "ext/jruby/lib/strscan.rb"
files << "lib/strscan.jar"
files << "ext/jruby/lib/strscan.rb"
s.require_paths += %w{ext/jruby/lib}
s.platform = "java"
else
s.require_paths = %w{lib}
files << "ext/strscan/extconf.rb"
files << "ext/strscan/strscan.c"
s.rdoc_options << "-idoc"
Expand Down
2 changes: 1 addition & 1 deletion test/strscan/test_ractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

class TestStringScannerRactor < Test::Unit::TestCase
def setup
omit "Ractor not defined" unless defined? Ractor
omit("Ractor not defined") unless defined? Ractor
end

def test_ractor
Expand Down
Loading
Loading