Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

253 lines (202 sloc) 6.956 kB
#include "oniguruma.h" // Must be first.
#include "builtin/regexp.hpp"
#include "builtin/class.hpp"
#include "builtin/integer.hpp"
#include "builtin/lookuptable.hpp"
#include "builtin/string.hpp"
#include "builtin/symbol.hpp"
#include "builtin/tuple.hpp"
#include "vm.hpp"
#include "vm/object_utils.hpp"
#include "objectmemory.hpp"
#define OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
#define OPTION_EXTENDED ONIG_OPTION_EXTEND
#define OPTION_MULTILINE ONIG_OPTION_MULTILINE
#define OPTION_MASK (OPTION_IGNORECASE|OPTION_EXTENDED|OPTION_MULTILINE)
#define KCODE_ASCII 0
#define KCODE_NONE 16
#define KCODE_EUC 32
#define KCODE_SJIS 48
#define KCODE_UTF8 64
#define KCODE_MASK (KCODE_EUC|KCODE_SJIS|KCODE_UTF8)
namespace rubinius {
void Regexp::Info::cleanup(OBJECT regexp) {
onig_free(as<Regexp>(regexp)->onig_data);
as<Regexp>(regexp)->onig_data = NULL;
}
void Regexp::init(STATE) {
onig_init();
GO(regexp).set(state->new_class("Regexp", G(object), 0));
G(regexp)->set_object_type(state, RegexpType);
GO(matchdata).set(state->new_class("MatchData", G(object), 0));
G(matchdata)->set_object_type(state, MatchDataType);
}
char *Regexp::version(STATE) {
return (char*)onig_version();
}
static OnigEncoding get_enc_from_kcode(int kcode) {
OnigEncoding r;
r = ONIG_ENCODING_ASCII;
switch (kcode) {
case KCODE_NONE:
r = ONIG_ENCODING_ASCII;
break;
case KCODE_EUC:
r = ONIG_ENCODING_EUC_JP;
break;
case KCODE_SJIS:
r = ONIG_ENCODING_SJIS;
break;
case KCODE_UTF8:
r = ONIG_ENCODING_UTF8;
break;
}
return r;
}
int get_kcode_from_enc(OnigEncoding enc) {
int r;
r = KCODE_ASCII;
if (enc == ONIG_ENCODING_ASCII) r = KCODE_NONE;
if (enc == ONIG_ENCODING_EUC_JP) r = KCODE_EUC;
if (enc == ONIG_ENCODING_SJIS) r = KCODE_SJIS;
if (enc == ONIG_ENCODING_UTF8) r = KCODE_UTF8;
return r;
}
struct _gather_data {
STATE;
LookupTable* tbl;
};
static int _gather_names(const UChar *name, const UChar *name_end,
int ngroup_num, int *group_nums, regex_t *reg, struct _gather_data *gd) {
int gn;
STATE;
LookupTable* tbl = gd->tbl;
state = gd->state;
gn = group_nums[0];
tbl->store(state, state->symbol((char*)name), Integer::from(state, gn - 1));
return 0;
}
/*
* Only initialize the object, not oniguruma. This allows copying of the
* regular expression via Regexp#initialize_copy
*/
Regexp* Regexp::create(STATE) {
Regexp* o_reg = (Regexp*)state->om->new_object(G(regexp), Regexp::fields);
o_reg->onig_data = NULL;
return o_reg;
}
/*
* This is a primitive so #initialize_copy can work.
*/
Regexp* Regexp::initialize(STATE, String* pattern, INTEGER options,
OBJECT lang) {
const UChar *pat;
const UChar *end;
OnigErrorInfo err_info;
OnigOptionType opts;
OnigEncoding enc;
int err, num_names, kcode;
pat = (UChar*)pattern->c_str();
end = pat + pattern->size();
opts = options->to_native();
kcode = opts & KCODE_MASK;
enc = get_enc_from_kcode(kcode);
opts &= OPTION_MASK;
err = onig_new(&this->onig_data, pat, end, opts, enc, ONIG_SYNTAX_RUBY, &err_info);
if(err != ONIG_NORMAL) {
UChar onig_err_buf[ONIG_MAX_ERROR_MESSAGE_LEN];
char err_buf[1024];
onig_error_code_to_str(onig_err_buf, err, &err_info);
snprintf(err_buf, 1024, "%s: %s", onig_err_buf, pat);
Exception::regexp_error(state, err_buf);
}
this->source(state, pattern);
num_names = onig_number_of_names(this->onig_data);
if(num_names == 0) {
this->names(state, (LookupTable*)Qnil);
} else {
struct _gather_data gd;
gd.state = state;
LookupTable* tbl = LookupTable::create(state);
gd.tbl = tbl;
onig_foreach_name(this->onig_data, (int (*)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*))_gather_names, (void*)&gd);
this->names(state, tbl);
}
return this;
}
// 'self' is passed in automatically by the primitive glue
Regexp* Regexp::allocate(STATE, OBJECT self) {
Regexp* re = Regexp::create(state);
re->klass(state, (Class*)self);
return re;
}
OBJECT Regexp::options(STATE) {
OnigEncoding enc;
OnigOptionType option;
regex_t* reg;
reg = onig_data;
option = onig_get_options(reg);
enc = onig_get_encoding(reg);
return Integer::from(state, ((int)(option & OPTION_MASK) | get_kcode_from_enc(enc)));
}
static OBJECT _md_region_to_tuple(STATE, OnigRegion *region, int max) {
int i;
Tuple* sub;
Tuple* tup = Tuple::create(state, region->num_regs - 1);
for(i = 1; i < region->num_regs; i++) {
sub = Tuple::create(state, 2);
sub->put(state, 0, Integer::from(state, region->beg[i]));
sub->put(state, 1, Integer::from(state, region->end[i]));
tup->put(state, i - 1, sub);
}
return tup;
}
static OBJECT get_match_data(STATE, OnigRegion *region, String* string, Regexp* regexp, int max) {
MatchData* md = (MatchData*)state->om->new_object(G(matchdata), MatchData::fields);
md->source(state, string->string_dup(state));
md->regexp(state, regexp);
Tuple* tup = Tuple::create(state, 2);
tup->put(state, 0, Integer::from(state, region->beg[0]));
tup->put(state, 1, Integer::from(state, region->end[0]));
md->full(state, tup);
md->region(state, (Tuple*)_md_region_to_tuple(state, region, max));
return md;
}
OBJECT Regexp::match_region(STATE, String* string, INTEGER start, INTEGER end, OBJECT forward) {
int beg, max;
const UChar *str;
OnigRegion *region;
OBJECT md;
region = onig_region_new();
max = string->size();
str = (UChar*)string->c_str();
if(!RTEST(forward)) {
beg = onig_search(onig_data, str, str + max, str + end->to_native(), str + start->to_native(), region, ONIG_OPTION_NONE);
} else {
beg = onig_search(onig_data, str, str + max, str + start->to_native(), str + end->to_native(), region, ONIG_OPTION_NONE);
}
if(beg == ONIG_MISMATCH) {
onig_region_free(region, 1);
return Qnil;
}
md = get_match_data(state, region, string, this, max);
onig_region_free(region, 1);
return md;
}
OBJECT Regexp::match_start(STATE, String* string, INTEGER start) {
int beg, max;
const UChar *str;
OnigRegion *region;
OBJECT md = Qnil;
region = onig_region_new();
max = string->size();
str = (UChar*)string->c_str();
beg = onig_match(onig_data, str, str + max, str + start->to_native(), region,
ONIG_OPTION_NONE);
if(beg != ONIG_MISMATCH) {
md = get_match_data(state, region, string, this, max);
}
onig_region_free(region, 1);
return md;
}
}
Jump to Line
Something went wrong with that request. Please try again.