Skip to content

Commit 74bdae9

Browse files
committed
mruby-encoding: add Poorman's Encoding gem
The difference from CRuby's Encoding: - Encoding is a module, instead of a class - each Encoding (e.g. `Encoding::UTF_8) is a string instead of Encoding object - only supports `UTF-8` and `ASCII-8BIT` (and its alias `BINARY`) Using this gem automatically turn on `MRB_UTF8_STRING` support.
1 parent e45ea53 commit 74bdae9

4 files changed

Lines changed: 198 additions & 0 deletions

File tree

mrbgems/mruby-encoding/mrbgem.rake

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
MRuby::Gem::Specification.new('mruby-encoding') do |spec|
2+
spec.license = 'MIT'
3+
spec.author = 'mruby developers'
4+
spec.summary = "Poorman's Encoding for mruby"
5+
spec.build.defines << "HAVE_MRUBY_ENCODING_GEM"
6+
spec.build.defines << "MRB_UTF8_STRING"
7+
spec.add_test_dependency 'mruby-string-ext'
8+
end
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#include <mruby.h>
2+
#include <mruby/string.h>
3+
#include <mruby/variable.h>
4+
#include <mruby/internal.h>
5+
#include <mruby/presym.h>
6+
7+
#define ENC_ASCII_8BIT "ASCII-8BIT"
8+
#define ENC_BINARY "BINARY"
9+
#define ENC_UTF8 "UTF-8"
10+
11+
#define ENC_COMP_P(enc, enc_lit) \
12+
casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1)
13+
14+
static mrb_bool
15+
casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2)
16+
{
17+
if (len1 != len2) return FALSE;
18+
19+
const char *e1 = s1 + len1;
20+
const char *e2 = s2 + len2;
21+
while (s1 < e1 && s2 < e2) {
22+
if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE;
23+
s1++;
24+
s2++;
25+
}
26+
return TRUE;
27+
}
28+
29+
/*
30+
* call-seq:
31+
* string.valid_encoding? -> true or false
32+
*
33+
* Returns true for a string which is encoded correctly.
34+
*
35+
*/
36+
static mrb_value
37+
str_valid_enc_p(mrb_state *mrb, mrb_value str)
38+
{
39+
#define utf8_islead(c) ((unsigned char)((c)&0xc0) != 0x80)
40+
41+
struct RString *s = mrb_str_ptr(str);
42+
if (RSTR_SINGLE_BYTE_P(s)) return mrb_true_value();
43+
if (RSTR_BINARY_P(s)) return mrb_true_value();
44+
45+
mrb_int byte_len = RSTR_LEN(s);
46+
mrb_int utf8_len = 0;
47+
const char *p = RSTR_PTR(s);
48+
const char *e = p + byte_len;
49+
while (p < e) {
50+
mrb_int len = mrb_utf8len(p, e);
51+
52+
if (len == 1 && (*p & 0x80)) return mrb_false_value();
53+
p += len;
54+
utf8_len++;
55+
}
56+
if (byte_len == utf8_len) RSTR_SET_SINGLE_BYTE_FLAG(s);
57+
return mrb_true_value();
58+
}
59+
60+
static mrb_value
61+
str_b(mrb_state *mrb, mrb_value self)
62+
{
63+
mrb_value str = mrb_str_dup(mrb, self);
64+
mrb_str_ptr(str)->flags |= MRB_STR_BINARY;
65+
return str;
66+
}
67+
68+
static mrb_value
69+
get_encoding(mrb_state *mrb, mrb_sym enc)
70+
{
71+
struct RClass *e = mrb_module_get_id(mrb, MRB_SYM(Encoding));
72+
return mrb_const_get(mrb, mrb_obj_value(e), enc);
73+
}
74+
75+
static mrb_value
76+
str_encoding(mrb_state *mrb, mrb_value self)
77+
{
78+
struct RString *s = mrb_str_ptr(self);
79+
if (RSTR_BINARY_P(s)) {
80+
return get_encoding(mrb, MRB_SYM(BINARY));
81+
}
82+
return get_encoding(mrb, MRB_SYM(UTF_8));
83+
}
84+
85+
static mrb_value
86+
str_force_encoding(mrb_state *mrb, mrb_value self)
87+
{
88+
mrb_value enc;
89+
90+
mrb_get_args(mrb, "S", &enc);
91+
92+
struct RString *s = mrb_str_ptr(self);
93+
if (ENC_COMP_P(enc, ENC_ASCII_8BIT) ||
94+
ENC_COMP_P(enc, ENC_BINARY)) {
95+
s->flags |= MRB_STR_BINARY;
96+
}
97+
else if (ENC_COMP_P(enc, ENC_UTF8)) {
98+
s->flags &= ~MRB_STR_BINARY;
99+
}
100+
else {
101+
mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc);
102+
}
103+
return self;
104+
}
105+
106+
void
107+
mrb_mruby_encoding_gem_init(mrb_state* mrb)
108+
{
109+
struct RClass *s = mrb->string_class;
110+
111+
mrb_define_method_id(mrb, s, MRB_SYM(b), str_b, MRB_ARGS_NONE());
112+
mrb_define_method_id(mrb, s, MRB_SYM_Q(valid_encoding), str_valid_enc_p, MRB_ARGS_NONE());
113+
mrb_define_method_id(mrb, s, MRB_SYM(encoding), str_encoding, MRB_ARGS_NONE());
114+
mrb_define_method_id(mrb, s, MRB_SYM(force_encoding), str_force_encoding, MRB_ARGS_REQ(1));
115+
116+
/* Poorman's Encoding
117+
*
118+
* Encoding - module instead of class
119+
* encodings - supports only UTF-8 and ASCII-8BIT (and its alias BINARY)
120+
* each Encoding - encoding name string instead of Encoding object
121+
*
122+
*/
123+
struct RClass *e = mrb_define_module_id(mrb, MRB_SYM(Encoding));
124+
mrb_value b = mrb_str_new_lit_frozen(mrb, ENC_ASCII_8BIT);
125+
mrb_define_const_id(mrb, e, MRB_SYM(ASCII_8BIT), b);
126+
mrb_define_const_id(mrb, e, MRB_SYM(BINARY), b);
127+
mrb_define_const_id(mrb, e, MRB_SYM(UTF_8), mrb_str_new_lit_frozen(mrb, ENC_UTF8));
128+
}
129+
130+
void
131+
mrb_mruby_encoding_gem_final(mrb_state* mrb)
132+
{
133+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
assert('Integer#chr') do
2+
assert_equal("A", 65.chr)
3+
assert_equal("B", 0x42.chr)
4+
assert_equal("\xab", 171.chr)
5+
assert_raise(RangeError) { -1.chr }
6+
assert_raise(RangeError) { 256.chr }
7+
8+
assert_equal("A", 65.chr("ASCII-8BIT"))
9+
assert_equal("B", 0x42.chr("BINARY"))
10+
assert_equal("\xab", 171.chr("ascii-8bit"))
11+
assert_raise(RangeError) { -1.chr("binary") }
12+
assert_raise(RangeError) { 256.chr("Ascii-8bit") }
13+
assert_raise(ArgumentError) { 65.chr("ASCII") }
14+
assert_raise(ArgumentError) { 65.chr("ASCII-8BIT", 2) }
15+
assert_raise(TypeError) { 65.chr(:BINARY) }
16+
17+
if __ENCODING__ == "ASCII-8BIT"
18+
assert_raise(ArgumentError) { 65.chr("UTF-8") }
19+
else
20+
assert_equal("A", 65.chr("UTF-8"))
21+
assert_equal("B", 0x42.chr("UTF-8"))
22+
assert_equal("«", 171.chr("utf-8"))
23+
assert_equal("あ", 12354.chr("Utf-8"))
24+
assert_raise(RangeError) { -1.chr("utf-8") }
25+
assert_raise(RangeError) { 0x110000.chr.chr("UTF-8") }
26+
end
27+
end
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
##
2+
# String(Ext) Test
3+
4+
UTF8STRING = __ENCODING__ == "UTF-8"
5+
6+
assert('String#valid_encoding?') do
7+
assert_true "hello".valid_encoding?
8+
if UTF8STRING
9+
assert_true "あ".valid_encoding?
10+
assert_false "\xfe".valid_encoding?
11+
assert_false "あ\xfe".valid_encoding?
12+
assert_true "あ\xfe".b.valid_encoding?
13+
else
14+
assert_true "\xfe".valid_encoding?
15+
end
16+
end
17+
18+
assert('String#encoding') do
19+
if UTF8STRING
20+
a = "あ"
21+
assert_equal Encoding::UTF_8, a.encoding
22+
assert_equal Encoding::BINARY, a.b.encoding
23+
assert_equal a, a.force_encoding(Encoding::BINARY)
24+
assert_equal a, a.force_encoding(Encoding::BINARY)
25+
assert_equal Encoding::BINARY, a.encoding
26+
else
27+
a = "hello"
28+
assert_equal Encoding::BINARY, a.encoding
29+
end
30+
end

0 commit comments

Comments
 (0)