Skip to content
Permalink
Browse files

Implemented CAPI rb_enc_codepoint_len.

  • Loading branch information
Federico Ravasio & Dario Bertini authored and razielgn committed Jun 3, 2013
1 parent b3579cf commit 08bff7ddda6f0b85da0145e4d369bb1af90ffc1c
@@ -429,5 +429,46 @@
end
end

describe "rb_enc_codepoint_len" do
it "raises ArgumentError if an empty string is given" do
lambda do
@s.rb_enc_codepoint_len("")
end.should raise_error(ArgumentError)
end

it "raises ArgumentError if an invalid byte sequence is given" do
lambda do
@s.rb_enc_codepoint_len("\xa0\xa1") # Invalid sequence identifier
end.should raise_error(ArgumentError)
end

it "returns codepoint 0x24 and length 1 for character '$'" do
codepoint, length = @s.rb_enc_codepoint_len("$")

codepoint.should == 0x24
length.should == 1
end

it "returns codepoint 0xA2 and length 2 for character '¢'" do
codepoint, length = @s.rb_enc_codepoint_len("¢")

codepoint.should == 0xA2
length.should == 2
end

it "returns codepoint 0x20AC and length 3 for character '€'" do
codepoint, length = @s.rb_enc_codepoint_len("€")

codepoint.should == 0x20AC
length.should == 3
end

it "returns codepoint 0x24B62 and length 4 for character '𤭢'" do
codepoint, length = @s.rb_enc_codepoint_len("𤭢")

codepoint.should == 0x24B62
length.should == 4
end
end
end
end
@@ -257,6 +257,18 @@ static VALUE encoding_spec_rb_enc_nth(VALUE self, VALUE str, VALUE index) {
}
#endif

#ifdef HAVE_RB_ENC_CODEPOINT_LEN
static VALUE encoding_spec_rb_enc_codepoint_len(VALUE self, VALUE str) {
char* start = RSTRING_PTR(str);
char* end = start + RSTRING_LEN(str);

int len;
unsigned int codepoint = rb_enc_codepoint_len(start, end, &len, rb_enc_get(str));

return rb_ary_new3(2, LONG2NUM(codepoint), LONG2NUM(len));
}
#endif

void Init_encoding_spec() {
VALUE cls;
cls = rb_define_class("CApiEncodingSpecs", rb_cObject);
@@ -401,6 +413,10 @@ void Init_encoding_spec() {
#ifdef HAVE_RB_ENC_NTH
rb_define_method(cls, "rb_enc_nth", encoding_spec_rb_enc_nth, 2);
#endif

#ifdef HAVE_RB_ENC_CODEPOINT_LEN
rb_define_method(cls, "rb_enc_codepoint_len", encoding_spec_rb_enc_codepoint_len, 1);
#endif
}

#ifdef __cplusplus
@@ -191,6 +191,7 @@
#define HAVE_RB_ENCDB_ALIAS 1
#define HAVE_RB_ENC_ASSOCIATE 1
#define HAVE_RB_ENC_ASSOCIATE_INDEX 1
#define HAVE_RB_ENC_CODEPOINT_LEN 1
#define HAVE_RB_ENC_COMPATIBLE 1
#define HAVE_RB_ENC_COPY 1
#define HAVE_RB_ENC_FIND 1
@@ -89,6 +89,8 @@ rb_encoding* rb_to_encoding(VALUE obj);

VALUE rb_enc_from_encoding(rb_encoding *enc);

unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc);

int rb_define_dummy_encoding(const char *);
int rb_enc_find_index(const char *name);
int rb_enc_to_index(rb_encoding* enc);
@@ -398,4 +398,22 @@ extern "C" {

Transcoding::define(env->state(), (OnigTranscodingType*)trans);
}

unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc) {
if(e <= p) {
rb_raise(rb_eArgError, "empty string");
}

int r = rb_enc_precise_mbclen(p, e, enc);

if(ONIGENC_MBCLEN_CHARFOUND_P(r)) {
if(len_p) {
*len_p = ONIGENC_MBCLEN_CHARFOUND_LEN(r);
}

return ONIGENC_MBC_TO_CODE(enc, (UChar*)p, (UChar*)e);
} else {
rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
}
}
}

0 comments on commit 08bff7d

Please sign in to comment.
You can’t perform that action at this time.