@@ -1318,6 +1318,48 @@ str_uminus(mrb_state *mrb, mrb_value str)
13181318 return mrb_obj_freeze (mrb , mrb_str_dup (mrb , str ));
13191319}
13201320
1321+ /*
1322+ * call-seq:
1323+ * string.valid_encoding? -> true or false
1324+ *
1325+ * Returns true for a string which is encoded correctly.
1326+ *
1327+ */
1328+ static mrb_value
1329+ str_valid_enc_p (mrb_state * mrb , mrb_value str )
1330+ {
1331+ #ifdef MRB_UTF8_STRING
1332+ #define utf8_islead (c ) ((unsigned char)((c)&0xc0) != 0x80)
1333+
1334+ struct RString * s = mrb_str_ptr (str );
1335+ if (RSTR_ASCII_P (s )) return mrb_true_value ();
1336+
1337+ mrb_int byte_len = RSTR_LEN (s );
1338+ mrb_int utf8_len = 0 ;
1339+ const char * p = RSTR_PTR (s );
1340+ const char * e = p + byte_len ;
1341+ while (p < e ) {
1342+ mrb_int len = mrb_utf8len_table [(unsigned char )p [0 ] >> 3 ];
1343+ if (len == 0 || len > e - p )
1344+ return mrb_false_value ();
1345+ switch (len ) {
1346+ case 4 :
1347+ if (utf8_islead (p [3 ])) return mrb_false_value ();
1348+ case 3 :
1349+ if (utf8_islead (p [2 ])) return mrb_false_value ();
1350+ case 2 :
1351+ if (utf8_islead (p [1 ])) return mrb_false_value ();
1352+ default :
1353+ break ;
1354+ }
1355+ p += len ;
1356+ utf8_len ++ ;
1357+ }
1358+ if (byte_len == utf8_len ) RSTR_SET_ASCII_FLAG (s );
1359+ #endif
1360+ return mrb_true_value ();
1361+ }
1362+
13211363void
13221364mrb_mruby_string_ext_gem_init (mrb_state * mrb )
13231365{
@@ -1355,6 +1397,7 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb)
13551397 mrb_define_method (mrb , s , "casecmp?" , str_casecmp_p , MRB_ARGS_REQ (1 ));
13561398 mrb_define_method (mrb , s , "+@" , str_uplus , MRB_ARGS_REQ (1 ));
13571399 mrb_define_method (mrb , s , "-@" , str_uminus , MRB_ARGS_REQ (1 ));
1400+ mrb_define_method (mrb , s , "valid_encoding?" , str_valid_enc_p , MRB_ARGS_NONE ());
13581401
13591402 mrb_define_method (mrb , s , "__lines" , str_lines , MRB_ARGS_NONE ());
13601403 mrb_define_method (mrb , s , "__codepoints" , str_codepoints , MRB_ARGS_NONE ());
0 commit comments