Skip to content

Commit

Permalink
Added #charpos for multibyte string position.
Browse files Browse the repository at this point in the history
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37916 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information
ryan committed Nov 28, 2012
1 parent db9fe59 commit 0700a91
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 1 deletion.
5 changes: 5 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
Wed Nov 28 09:15:51 2012 Ryan Davis <ryand-ruby@zenspider.com>

* ext/strscan/strscan.c: Added #charpos for multibyte string position.
* test/strscan/test_stringscanner.rb: ditto

Wed Nov 28 09:00:34 2012 Aaron Patterson <aaron@tenderlovemaking.com>

* ext/fiddle/fiddle.c: adding alignment constants for compatibility
Expand Down
32 changes: 31 additions & 1 deletion ext/strscan/strscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

static VALUE StringScanner;
static VALUE ScanError;
static ID id_byteslice;

struct strscanner
{
Expand Down Expand Up @@ -371,7 +372,7 @@ strscan_concat(VALUE self, VALUE str)
* value is zero. In the 'terminated' position (i.e. the string is exhausted),
* this value is the bytesize of the string.
*
* In short, it's a 0-based index into the string.
* In short, it's a 0-based index into bytes of the string.
*
* s = StringScanner.new('test string')
* s.pos # -> 0
Expand All @@ -389,6 +390,32 @@ strscan_get_pos(VALUE self)
return INT2FIX(p->curr);
}

/*
* Returns the character position of the scan pointer. In the 'reset' position, this
* value is zero. In the 'terminated' position (i.e. the string is exhausted),
* this value is the size of the string.
*
* In short, it's a 0-based index into the string.
*
* s = StringScanner.new("abcädeföghi")
* s.charpos # -> 0
* s.scan_until(/ä/) # -> "abcä"
* s.pos # -> 5
* s.charpos # -> 4
*/
static VALUE
strscan_get_charpos(VALUE self)
{
struct strscanner *p;
VALUE substr;

GET_SCANNER(self, p);

substr = rb_funcall(p->str, id_byteslice, 2, INT2FIX(0), INT2NUM(p->curr));

return rb_str_length(substr);
}

/*
* call-seq: pos=(n)
*
Expand Down Expand Up @@ -1262,6 +1289,8 @@ Init_strscan()
ID id_scanerr = rb_intern("ScanError");
VALUE tmp;

id_byteslice = rb_intern("byteslice");

StringScanner = rb_define_class("StringScanner", rb_cObject);
ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
if (!rb_const_defined(rb_cObject, id_scanerr)) {
Expand All @@ -1287,6 +1316,7 @@ Init_strscan()
rb_define_method(StringScanner, "<<", strscan_concat, 1);
rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
rb_define_method(StringScanner, "charpos", strscan_get_charpos, 0);
rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1);

Expand Down
12 changes: 12 additions & 0 deletions test/strscan/test_stringscanner.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
#
# test/strscan/test_stringscanner.rb
#
Expand Down Expand Up @@ -199,6 +200,17 @@ def test_pos
assert_equal 11, s.pos
end

def test_pos_unicode
s = StringScanner.new("abcädeföghi")
assert_equal 0, s.charpos
assert_equal "abcä", s.scan_until(/ä/)
assert_equal 4, s.charpos
assert_equal "defö", s.scan_until(/ö/)
assert_equal 8, s.charpos
s.terminate
assert_equal 11, s.charpos
end

def test_concat
s = StringScanner.new('a')
s.scan(/a/)
Expand Down

0 comments on commit 0700a91

Please sign in to comment.