Permalink
Browse files

Make matching use byte arrays instead

git-svn-id: http://svn.codehaus.org/jruby/branches/newreg@3229 961051c9-f516-0410-bf72-c9f7e237a7b7
  • Loading branch information...
1 parent 7641cf2 commit 04c678666dab255b1f549b23abc7d6469393a7ef @olabini olabini committed Mar 19, 2007
@@ -50,7 +50,8 @@
* @author amoore
*/
public class RubyMatchData extends RubyObject {
- char[] str;
+ byte[] str;
+ int len;
Registers regs;
private RubyString _str;
@@ -93,7 +94,7 @@ private IRubyObject match_array(int start) {
if(regs.beg[i] == -1) {
ary.add(getRuntime().getNil());
} else {
- IRubyObject _s = RubyString.newString(getRuntime(),ByteList.plain(str),regs.beg[i],regs.end[i]-regs.beg[i]);
+ IRubyObject _s = RubyString.newString(getRuntime(),new ByteList(str,regs.beg[i],regs.end[i]-regs.beg[i]));
if(taint) {
_s.taint();
}
@@ -193,7 +194,7 @@ public IRubyObject pre_match() {
if(regs.beg[0] == -1) {
return getRuntime().getNil();
}
- RubyString str_ = RubyString.newString(getRuntime(),new String(str,0,regs.beg[0]));
+ RubyString str_ = RubyString.newString(getRuntime(),new ByteList(str,0,regs.beg[0]));
if(isTaint()) {
str_.taint();
}
@@ -207,7 +208,7 @@ public IRubyObject post_match() {
if(regs.beg[0] == -1) {
return getRuntime().getNil();
}
- RubyString str_ = RubyString.newString(getRuntime(),new String(str,regs.end[0],str.length-regs.end[0]));
+ RubyString str_ = RubyString.newString(getRuntime(),new ByteList(str,regs.end[0],str.length-regs.end[0]));
if(isTaint()) {
str_.taint();
}
@@ -233,7 +234,7 @@ public IRubyObject to_s() {
*/
public IRubyObject string() {
if(_str == null) {
- _str = RubyString.newString(getRuntime(),ByteList.plain(str));
+ _str = RubyString.newString(getRuntime(),new ByteList(str,0,len,false));
_str.setFrozen(true);
}
return _str;
@@ -64,7 +64,7 @@
private boolean kcode_default = true;
private KCode kcode;
private Pattern ptr;
- private char[] str;
+ private byte[] str;
private int len;
private boolean literal;
@@ -123,16 +123,24 @@ public static RubyClass createRegexpClass(Ruby runtime) {
}
public static RubyRegexp newRegexp(Ruby runtime, String pattern, int options, String kcode) {
- return newRegexp(runtime, pattern.toCharArray(), options, kcode);
+ return newRegexp(runtime, ByteList.plain(pattern), options, kcode);
}
public static RubyRegexp newRegexp(IRubyObject ptr, int options, String kcode) {
- return newRegexp(ptr.getRuntime(), ptr.convertToString().getByteList().toCharArray(), options, kcode);
+ return newRegexp(ptr.getRuntime(), ptr.convertToString().getByteList(), options, kcode);
}
- public static RubyRegexp newRegexp(Ruby runtime, char[] pattern, int options, String kcode) {
+ public static RubyRegexp newRegexp(Ruby runtime, ByteList pattern, int options, String kcode) {
+ return newRegexp(runtime, pattern.bytes, pattern.realSize, options, kcode);
+ }
+
+ public static RubyRegexp newRegexp(Ruby runtime, byte[] pattern, int options, String kcode) {
+ return newRegexp(runtime, pattern, pattern.length, options, kcode);
+ }
+
+ public static RubyRegexp newRegexp(Ruby runtime, byte[] pattern, int len, int options, String kcode) {
RubyRegexp rr = new RubyRegexp(runtime);
- rr.initialize(pattern,pattern.length,options);
+ rr.initialize(pattern,len,options);
return rr;
}
@@ -163,7 +171,7 @@ public RubyFixnum hash() {
return getRuntime().newFixnum(hashval);
}
- private final static boolean memcmp(char[] s1, char[] s2, int len) {
+ private final static boolean memcmp(byte[] s1, byte[] s2, int len) {
int x = 0;
while(len-->0) {
if(s1[x] != s2[x]) {
@@ -233,7 +241,7 @@ private void rb_reg_check(IRubyObject re) {
/** rb_reg_initialize
*/
- private void initialize(char[] s, int len, int options) {
+ private void initialize(byte[] s, int len, int options) {
if(isTaint() && getRuntime().getSafeLevel() >= 4) {
throw getRuntime().newSecurityError("Insecure: can't modify regexp");
}
@@ -263,13 +271,13 @@ private void initialize(char[] s, int len, int options) {
break;
}
ptr = make_regexp(s, len, options & 0xf, kcode.getContext());
- str = new char[len];
+ str = new byte[len];
System.arraycopy(s,0,str,0,len);
this.len = len;
}
- private final Pattern make_regexp(char[] s, int len, int flags, Pattern.CompileContext ctx) {
- Pattern rp = new Pattern(new char[16],16,new char[256],flags);
+ private final Pattern make_regexp(byte[] s, int len, int flags, Pattern.CompileContext ctx) {
+ Pattern rp = new Pattern(new byte[16],16,new byte[256],flags);
try {
Pattern.compile(s,0,len,rp,ctx);
} catch(PatternSyntaxException e) {
@@ -282,14 +290,14 @@ private final Pattern make_regexp(char[] s, int len, int flags, Pattern.CompileC
return rp;
}
- private final StringBuffer rb_reg_desc(char[] s, int len) {
+ private final StringBuffer rb_reg_desc(byte[] s, int len) {
StringBuffer sb = new StringBuffer("/");
rb_reg_expr_str(sb, s, len);
sb.append("/");
return sb;
}
- private final void rb_reg_expr_str(StringBuffer sb, char[] s, int len) {
+ private final void rb_reg_expr_str(StringBuffer sb, byte[] s, int len) {
int p,pend;
boolean need_escape = false;
p = 0;
@@ -305,36 +313,36 @@ private final void rb_reg_expr_str(StringBuffer sb, char[] s, int len) {
p += Pattern.mbclen(s[p],ctx);
}
if(!need_escape) {
- sb.append(s,0,len);
+ sb.append(ByteList.createString(s,0,len));
} else {
p = 0;
while(p < pend) {
if(s[p] == '\\') {
int n = Pattern.mbclen(s[p+1],ctx) + 1;
- sb.append(s,p,n);
+ sb.append(ByteList.createString(s,p,n));
p += n;
continue;
} else if(s[p] == '/') {
sb.append("\\/");
} else if(Pattern.ismbchar(s[p],ctx)) {
- sb.append(s,p,Pattern.mbclen(s[p],ctx));
+ sb.append(ByteList.createString(s,p,Pattern.mbclen(s[p],ctx)));
p += Pattern.mbclen(s[p],ctx);
continue;
} else if((' ' == s[p] || (!Character.isWhitespace(s[p]) &&
!Character.isISOControl(s[p])))) {
- sb.append(s[p]);
- } else if(!Character.isSpace(s[p])) {
+ sb.append((char)(s[p]&0xFF));
+ } else if(!Character.isSpace((char)(s[p]&0xFF))) {
sb.append('\\');
sb.append(Integer.toString((int)(s[p]&0377),8));
} else {
- sb.append(s[p]);
+ sb.append((char)(s[p]&0xFF));
}
p++;
}
}
}
- private final void rb_reg_raise(char[] s, int len, String err) {
+ private final void rb_reg_raise(byte[] s, int len, String err) {
throw getRuntime().newRegexpError(err + ": " + rb_reg_desc(s,len));
}
@@ -383,7 +391,7 @@ private int rb_reg_options() {
/** rb_reg_initialize_m
*/
public IRubyObject initialize_m(IRubyObject[] args) {
- char[] s;
+ byte[] s;
int len;
int flags = 0;
@@ -439,8 +447,9 @@ public IRubyObject initialize_m(IRubyObject[] args) {
break;
}
}
- s = args[0].convertToString().getByteList().toCharArray();
- len = s.length;
+ ByteList bl = args[0].convertToString().getByteList();
+ s = bl.bytes;
+ len = bl.realSize;
}
initialize(s, len, flags);
return this;
@@ -465,8 +474,9 @@ public int search(RubyString str, int pos, boolean reverse) {
} else {
range = str.getByteList().length() - pos;
}
- char[] cstr = str.getByteList().toCharArray();
- result = ptr.search(cstr,cstr.length,pos,range,regs);
+ ByteList bl = str.getByteList();
+ byte[] cstr = bl.bytes;
+ result = ptr.search(cstr,bl.realSize,pos,range,regs);
if(result == -2) {
rb_reg_raise(cstr,len,"Stack overflow in regexp matcher");
@@ -487,7 +497,8 @@ public int search(RubyString str, int pos, boolean reverse) {
}
((RubyMatchData)match).regs = regs.copy();
- ((RubyMatchData)match).str = (char[])cstr.clone();
+ ((RubyMatchData)match).str = (byte[])cstr.clone();
+ ((RubyMatchData)match).len = bl.realSize;
getRuntime().getCurrentContext().setBackref(match);
match.infectBy(this);
@@ -616,7 +627,7 @@ public int adjust_startpos(IRubyObject str, int pos, boolean reverse) {
range = ((RubyString)str).getByteList().length() - pos;
}
- return ptr.adjust_startpos(ByteList.plain(((RubyString)str).getByteList().bytes), ((RubyString)str).getByteList().length(), pos, range);
+ return ptr.adjust_startpos(((RubyString)str).getByteList().bytes, ((RubyString)str).getByteList().realSize, pos, range);
}
public IRubyObject casefold_p() {
@@ -725,6 +736,10 @@ public IRubyObject to_s() {
} while(true);
}
+ private final boolean ISPRINT(byte c) {
+ return ISPRINT((char)(c&0xFF));
+ }
+
private final boolean ISPRINT(char c) {
return (' ' == c || (!Character.isWhitespace(c) && !Character.isISOControl(c)));
}
@@ -910,7 +925,7 @@ public static IRubyObject nth_match(int nth, IRubyObject match) {
return nil;
}
end = m.regs.end[nth];
- RubyString str = RubyString.newString(match.getRuntime(), new ByteList(ByteList.plain(m.str),start,end-start,false));
+ RubyString str = RubyString.newString(match.getRuntime(), new ByteList(m.str,start,end-start,false));
str.infectBy(match);
return str;
}
@@ -944,7 +959,7 @@ public static IRubyObject match_pre(IRubyObject match) {
if(m.regs.beg[0] == -1) {
return nil;
}
- RubyString str = RubyString.newString(match.getRuntime(), new ByteList(ByteList.plain(m.str),0,m.regs.beg[0],false));
+ RubyString str = RubyString.newString(match.getRuntime(), new ByteList(m.str,0,m.regs.beg[0]));
str.infectBy(match);
return str;
}
@@ -961,7 +976,7 @@ public static IRubyObject match_post(IRubyObject match) {
if(m.regs.beg[0] == -1) {
return nil;
}
- RubyString str = RubyString.newString(match.getRuntime(), new ByteList(ByteList.plain(m.str),m.regs.end[0],m.str.length-m.regs.end[0],false));
+ RubyString str = RubyString.newString(match.getRuntime(), new ByteList(m.str,m.regs.end[0],m.str.length-m.regs.end[0]));
str.infectBy(match);
return str;
}
@@ -1612,7 +1612,7 @@ private static IRubyObject regexpNode(Ruby runtime, Node node) {
}
RubyRegexp p = iVisited.getPattern();
if(p == null) {
- p = RubyRegexp.newRegexp(runtime, iVisited.getValue().toCharArray(), iVisited.getFlags(), lang);
+ p = RubyRegexp.newRegexp(runtime, iVisited.getValue(), iVisited.getFlags(), lang);
iVisited.setPattern(p);
}
return p;
@@ -329,6 +329,24 @@ public static ByteList create(CharSequence s) {
return new ByteList(plain(s),false);
}
+ public static String createString(byte[] s, int start, int len) {
+ try {
+ return new String(s,start,len, "ISO-8859-1");
+ } catch(Exception e) {
+ //CAN'T HAPPEN
+ return null;
+ }
+ }
+
+ public static byte[] plain(String s) {
+ try {
+ return s.getBytes("ISO-8859-1");
+ } catch(Exception e) {
+ //CAN'T HAPPEN
+ return null;
+ }
+ }
+
public static byte[] plain(CharSequence s) {
byte[] bytes = new byte[s.length()];
for (int i = 0; i < bytes.length; i++) {
@@ -161,7 +161,7 @@ public int matches(Pattern pattern) {
if(regs == null) {
regs = new Registers();
}
- char[] ccc = string.toCharArray();
+ byte[] ccc = ByteList.plain(string);
if(pattern.search(ccc,ccc.length,pos,ccc.length,regs) == pos) {
matchStart = pos;
matchEnd = regs.end[0];
@@ -178,7 +178,7 @@ public CharSequence scanUntil(Pattern pattern) {
if(regs == null) {
regs = new Registers();
}
- char[] ccc = string.toCharArray();
+ byte[] ccc = ByteList.plain(string);
if(pattern.search(ccc,ccc.length,pos,ccc.length,regs) >= pos) {
lastPos = pos;
matchStart = regs.beg[0];
@@ -199,7 +199,7 @@ public CharSequence scan(Pattern pattern) {
if(regs == null) {
regs = new Registers();
}
- char[] ccc = string.toCharArray();
+ byte[] ccc = ByteList.plain(string);
if(pattern.search(ccc,ccc.length,pos,ccc.length,regs) == pos) {
lastPos = pos;
matchStart = pos;
@@ -220,7 +220,7 @@ public CharSequence check(Pattern pattern) {
if(regs == null) {
regs = new Registers();
}
- char[] ccc = string.toCharArray();
+ byte[] ccc = ByteList.plain(string);
if(pattern.search(ccc,ccc.length,pos,ccc.length,regs) == pos) {
matchStart = pos;
matchEnd = regs.end[0];
@@ -238,7 +238,7 @@ public CharSequence checkUntil(Pattern pattern) {
if(regs == null) {
regs = new Registers();
}
- char[] ccc = string.toCharArray();
+ byte[] ccc = ByteList.plain(string);
if(pattern.search(ccc,ccc.length,pos,ccc.length,regs) >= pos) {
matchStart = regs.beg[0];
matchEnd = regs.end[0];
@@ -256,7 +256,7 @@ public int skip(Pattern pattern) {
if(regs == null) {
regs = new Registers();
}
- char[] ccc = string.toCharArray();
+ byte[] ccc = ByteList.plain(string);
if(pattern.search(ccc,ccc.length,pos,ccc.length,regs) == pos) {
lastPos = pos;
matchStart = pos;
@@ -276,7 +276,7 @@ public int skipUntil(Pattern pattern) {
if(regs == null) {
regs = new Registers();
}
- char[] ccc = string.toCharArray();
+ byte[] ccc = ByteList.plain(string);
if(pattern.search(ccc,ccc.length,pos,ccc.length,regs) >= pos) {
lastPos = pos;
pos = regs.end[0];
@@ -296,7 +296,7 @@ public int exists(Pattern pattern) {
if(regs == null) {
regs = new Registers();
}
- char[] ccc = string.toCharArray();
+ byte[] ccc = ByteList.plain(string);
if(pattern.search(ccc,ccc.length,pos,ccc.length,regs) >= pos) {
matchStart = regs.beg[0];
matchEnd = regs.end[0];
Oops, something went wrong.

0 comments on commit 04c6786

Please sign in to comment.