Skip to content

Commit eabe2d9

Browse files
committed
mruby-string-ext: add String#valid_encoding? method
1 parent bef7eea commit eabe2d9

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

mrbgems/mruby-string-ext/src/string.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,48 @@ str_uminus(mrb_state *mrb, mrb_value str)
13181318
return mrb_obj_freeze(mrb, mrb_str_dup(mrb, str));
13191319
}
13201320

1321+
/*
1322+
* call-seq:
1323+
* string.valid_encoding? -> true or false
1324+
*
1325+
* Returns true for a string which is encoded correctly.
1326+
*
1327+
*/
1328+
static mrb_value
1329+
str_valid_enc_p(mrb_state *mrb, mrb_value str)
1330+
{
1331+
#ifdef MRB_UTF8_STRING
1332+
#define utf8_islead(c) ((unsigned char)((c)&0xc0) != 0x80)
1333+
1334+
struct RString *s = mrb_str_ptr(str);
1335+
if (RSTR_ASCII_P(s)) return mrb_true_value();
1336+
1337+
mrb_int byte_len = RSTR_LEN(s);
1338+
mrb_int utf8_len = 0;
1339+
const char *p = RSTR_PTR(s);
1340+
const char *e = p + byte_len;
1341+
while (p < e) {
1342+
mrb_int len = mrb_utf8len_table[(unsigned char)p[0] >> 3];
1343+
if (len == 0 || len > e - p)
1344+
return mrb_false_value();
1345+
switch (len) {
1346+
case 4:
1347+
if (utf8_islead(p[3])) return mrb_false_value();
1348+
case 3:
1349+
if (utf8_islead(p[2])) return mrb_false_value();
1350+
case 2:
1351+
if (utf8_islead(p[1])) return mrb_false_value();
1352+
default:
1353+
break;
1354+
}
1355+
p += len;
1356+
utf8_len++;
1357+
}
1358+
if (byte_len == utf8_len) RSTR_SET_ASCII_FLAG(s);
1359+
#endif
1360+
return mrb_true_value();
1361+
}
1362+
13211363
void
13221364
mrb_mruby_string_ext_gem_init(mrb_state* mrb)
13231365
{
@@ -1355,6 +1397,7 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb)
13551397
mrb_define_method(mrb, s, "casecmp?", str_casecmp_p, MRB_ARGS_REQ(1));
13561398
mrb_define_method(mrb, s, "+@", str_uplus, MRB_ARGS_REQ(1));
13571399
mrb_define_method(mrb, s, "-@", str_uminus, MRB_ARGS_REQ(1));
1400+
mrb_define_method(mrb, s, "valid_encoding?", str_valid_enc_p, MRB_ARGS_NONE());
13581401

13591402
mrb_define_method(mrb, s, "__lines", str_lines, MRB_ARGS_NONE());
13601403
mrb_define_method(mrb, s, "__codepoints", str_codepoints, MRB_ARGS_NONE());

0 commit comments

Comments
 (0)