Permalink
Browse files

Added #charpos for multibyte string position.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37916 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
  • Loading branch information...
1 parent db9fe59 commit 0700a9113f94a82d786271bc44140691d47f6bac ryan committed Nov 28, 2012
Showing with 48 additions and 1 deletion.
  1. +5 −0 ChangeLog
  2. +31 −1 ext/strscan/strscan.c
  3. +12 −0 test/strscan/test_stringscanner.rb
View
@@ -1,3 +1,8 @@
+Wed Nov 28 09:15:51 2012 Ryan Davis <ryand-ruby@zenspider.com>
+
+ * ext/strscan/strscan.c: Added #charpos for multibyte string position.
+ * test/strscan/test_stringscanner.rb: ditto
+
Wed Nov 28 09:00:34 2012 Aaron Patterson <aaron@tenderlovemaking.com>
* ext/fiddle/fiddle.c: adding alignment constants for compatibility
View
@@ -21,6 +21,7 @@
static VALUE StringScanner;
static VALUE ScanError;
+static ID id_byteslice;
struct strscanner
{
@@ -371,7 +372,7 @@ strscan_concat(VALUE self, VALUE str)
* value is zero. In the 'terminated' position (i.e. the string is exhausted),
* this value is the bytesize of the string.
*
- * In short, it's a 0-based index into the string.
+ * In short, it's a 0-based index into bytes of the string.
*
* s = StringScanner.new('test string')
* s.pos # -> 0
@@ -390,6 +391,32 @@ strscan_get_pos(VALUE self)
}
/*
+ * Returns the character position of the scan pointer. In the 'reset' position, this
+ * value is zero. In the 'terminated' position (i.e. the string is exhausted),
+ * this value is the size of the string.
+ *
+ * In short, it's a 0-based index into the string.
+ *
+ * s = StringScanner.new("abcädeföghi")
+ * s.charpos # -> 0
+ * s.scan_until(/ä/) # -> "abcä"
+ * s.pos # -> 5
+ * s.charpos # -> 4
+ */
+static VALUE
+strscan_get_charpos(VALUE self)
+{
+ struct strscanner *p;
+ VALUE substr;
+
+ GET_SCANNER(self, p);
+
+ substr = rb_funcall(p->str, id_byteslice, 2, INT2FIX(0), INT2NUM(p->curr));
+
+ return rb_str_length(substr);
+}
+
+/*
* call-seq: pos=(n)
*
* Set the byte position of the scan pointer.
@@ -1262,6 +1289,8 @@ Init_strscan()
ID id_scanerr = rb_intern("ScanError");
VALUE tmp;
+ id_byteslice = rb_intern("byteslice");
+
StringScanner = rb_define_class("StringScanner", rb_cObject);
ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
if (!rb_const_defined(rb_cObject, id_scanerr)) {
@@ -1287,6 +1316,7 @@ Init_strscan()
rb_define_method(StringScanner, "<<", strscan_concat, 1);
rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
+ rb_define_method(StringScanner, "charpos", strscan_get_charpos, 0);
rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1);
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
#
# test/strscan/test_stringscanner.rb
#
@@ -199,6 +200,17 @@ def test_pos
assert_equal 11, s.pos
end
+ def test_pos_unicode
+ s = StringScanner.new("abcädeföghi")
+ assert_equal 0, s.charpos
+ assert_equal "abcä", s.scan_until(/ä/)
+ assert_equal 4, s.charpos
+ assert_equal "defö", s.scan_until(/ö/)
+ assert_equal 8, s.charpos
+ s.terminate
+ assert_equal 11, s.charpos
+ end
+
def test_concat
s = StringScanner.new('a')
s.scan(/a/)

0 comments on commit 0700a91

Please sign in to comment.