|
| 1 | +#include <mruby.h> |
| 2 | +#include <mruby/string.h> |
| 3 | +#include <mruby/variable.h> |
| 4 | +#include <mruby/internal.h> |
| 5 | +#include <mruby/presym.h> |
| 6 | + |
| 7 | +#define ENC_ASCII_8BIT "ASCII-8BIT" |
| 8 | +#define ENC_BINARY "BINARY" |
| 9 | +#define ENC_UTF8 "UTF-8" |
| 10 | + |
| 11 | +#define ENC_COMP_P(enc, enc_lit) \ |
| 12 | + casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1) |
| 13 | + |
| 14 | +static mrb_bool |
| 15 | +casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2) |
| 16 | +{ |
| 17 | + if (len1 != len2) return FALSE; |
| 18 | + |
| 19 | + const char *e1 = s1 + len1; |
| 20 | + const char *e2 = s2 + len2; |
| 21 | + while (s1 < e1 && s2 < e2) { |
| 22 | + if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE; |
| 23 | + s1++; |
| 24 | + s2++; |
| 25 | + } |
| 26 | + return TRUE; |
| 27 | +} |
| 28 | + |
| 29 | +/* |
| 30 | + * call-seq: |
| 31 | + * string.valid_encoding? -> true or false |
| 32 | + * |
| 33 | + * Returns true for a string which is encoded correctly. |
| 34 | + * |
| 35 | + */ |
| 36 | +static mrb_value |
| 37 | +str_valid_enc_p(mrb_state *mrb, mrb_value str) |
| 38 | +{ |
| 39 | +#define utf8_islead(c) ((unsigned char)((c)&0xc0) != 0x80) |
| 40 | + |
| 41 | + struct RString *s = mrb_str_ptr(str); |
| 42 | + if (RSTR_SINGLE_BYTE_P(s)) return mrb_true_value(); |
| 43 | + if (RSTR_BINARY_P(s)) return mrb_true_value(); |
| 44 | + |
| 45 | + mrb_int byte_len = RSTR_LEN(s); |
| 46 | + mrb_int utf8_len = 0; |
| 47 | + const char *p = RSTR_PTR(s); |
| 48 | + const char *e = p + byte_len; |
| 49 | + while (p < e) { |
| 50 | + mrb_int len = mrb_utf8len(p, e); |
| 51 | + |
| 52 | + if (len == 1 && (*p & 0x80)) return mrb_false_value(); |
| 53 | + p += len; |
| 54 | + utf8_len++; |
| 55 | + } |
| 56 | + if (byte_len == utf8_len) RSTR_SET_SINGLE_BYTE_FLAG(s); |
| 57 | + return mrb_true_value(); |
| 58 | +} |
| 59 | + |
| 60 | +static mrb_value |
| 61 | +str_b(mrb_state *mrb, mrb_value self) |
| 62 | +{ |
| 63 | + mrb_value str = mrb_str_dup(mrb, self); |
| 64 | + mrb_str_ptr(str)->flags |= MRB_STR_BINARY; |
| 65 | + return str; |
| 66 | +} |
| 67 | + |
| 68 | +static mrb_value |
| 69 | +get_encoding(mrb_state *mrb, mrb_sym enc) |
| 70 | +{ |
| 71 | + struct RClass *e = mrb_module_get_id(mrb, MRB_SYM(Encoding)); |
| 72 | + return mrb_const_get(mrb, mrb_obj_value(e), enc); |
| 73 | +} |
| 74 | + |
| 75 | +static mrb_value |
| 76 | +str_encoding(mrb_state *mrb, mrb_value self) |
| 77 | +{ |
| 78 | + struct RString *s = mrb_str_ptr(self); |
| 79 | + if (RSTR_BINARY_P(s)) { |
| 80 | + return get_encoding(mrb, MRB_SYM(BINARY)); |
| 81 | + } |
| 82 | + return get_encoding(mrb, MRB_SYM(UTF_8)); |
| 83 | +} |
| 84 | + |
| 85 | +static mrb_value |
| 86 | +str_force_encoding(mrb_state *mrb, mrb_value self) |
| 87 | +{ |
| 88 | + mrb_value enc; |
| 89 | + |
| 90 | + mrb_get_args(mrb, "S", &enc); |
| 91 | + |
| 92 | + struct RString *s = mrb_str_ptr(self); |
| 93 | + if (ENC_COMP_P(enc, ENC_ASCII_8BIT) || |
| 94 | + ENC_COMP_P(enc, ENC_BINARY)) { |
| 95 | + s->flags |= MRB_STR_BINARY; |
| 96 | + } |
| 97 | + else if (ENC_COMP_P(enc, ENC_UTF8)) { |
| 98 | + s->flags &= ~MRB_STR_BINARY; |
| 99 | + } |
| 100 | + else { |
| 101 | + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc); |
| 102 | + } |
| 103 | + return self; |
| 104 | +} |
| 105 | + |
| 106 | +void |
| 107 | +mrb_mruby_encoding_gem_init(mrb_state* mrb) |
| 108 | +{ |
| 109 | + struct RClass *s = mrb->string_class; |
| 110 | + |
| 111 | + mrb_define_method_id(mrb, s, MRB_SYM(b), str_b, MRB_ARGS_NONE()); |
| 112 | + mrb_define_method_id(mrb, s, MRB_SYM_Q(valid_encoding), str_valid_enc_p, MRB_ARGS_NONE()); |
| 113 | + mrb_define_method_id(mrb, s, MRB_SYM(encoding), str_encoding, MRB_ARGS_NONE()); |
| 114 | + mrb_define_method_id(mrb, s, MRB_SYM(force_encoding), str_force_encoding, MRB_ARGS_REQ(1)); |
| 115 | + |
| 116 | + /* Poorman's Encoding |
| 117 | + * |
| 118 | + * Encoding - module instead of class |
| 119 | + * encodings - supports only UTF-8 and ASCII-8BIT (and its alias BINARY) |
| 120 | + * each Encoding - encoding name string instead of Encoding object |
| 121 | + * |
| 122 | + */ |
| 123 | + struct RClass *e = mrb_define_module_id(mrb, MRB_SYM(Encoding)); |
| 124 | + mrb_value b = mrb_str_new_lit_frozen(mrb, ENC_ASCII_8BIT); |
| 125 | + mrb_define_const_id(mrb, e, MRB_SYM(ASCII_8BIT), b); |
| 126 | + mrb_define_const_id(mrb, e, MRB_SYM(BINARY), b); |
| 127 | + mrb_define_const_id(mrb, e, MRB_SYM(UTF_8), mrb_str_new_lit_frozen(mrb, ENC_UTF8)); |
| 128 | +} |
| 129 | + |
| 130 | +void |
| 131 | +mrb_mruby_encoding_gem_final(mrb_state* mrb) |
| 132 | +{ |
| 133 | +} |
0 commit comments