Permalink
Browse files

Slight step sideways end of night code dump. StrNode is more accurate…

… for encoding. Regexp is somewhat better? More specs fail though
  • Loading branch information...
1 parent 55f8fb2 commit 9c4779ea2aa2db15594662835ea0739bcaafea86 @enebo enebo committed with yokolet Jan 8, 2011
@@ -1168,7 +1168,7 @@ public IRubyObject initialize_m19(IRubyObject arg0, IRubyObject arg1, IRubyObjec
private IRubyObject initializeByRegexp19(RubyRegexp regexp) {
regexp.check();
- return initializeCommon19(regexp.str, regexp.getEncoding(), regexp.pattern.getOptions());
+ return initializeCommon19(regexp.str, regexp.getEncoding(), regexp.getOptions());
}
// rb_reg_initialize_str
@@ -1190,6 +1190,7 @@ private RubyRegexp initializeCommon19(RubyString str, int options) {
private RubyRegexp initializeCommon19(ByteList bytes, Encoding enc, int options) {
Ruby runtime = getRuntime();
setKCode(runtime, options);
+
if (!isTaint() && runtime.getSafeLevel() >= 4) throw runtime.newSecurityError("Insecure: can't modify regexp");
checkFrozen();
if (isLiteral()) throw runtime.newSecurityError("can't modify literal regexp");
@@ -1211,8 +1212,9 @@ private RubyRegexp initializeCommon19(ByteList bytes, Encoding enc, int options)
enc = USASCIIEncoding.INSTANCE;
}
- if ((options & ARG_ENCODING_FIXED) == 0 && fixedEnc[0] == null) setKCodeDefault();
+ if ((options & ARG_ENCODING_FIXED) != 0 && fixedEnc[0] == null) setKCodeDefault();
if ((options & ARG_ENCODING_NONE) != 0) setEncodingNone();
+
pattern = getRegexpFromCache(runtime, unescaped, enc, options & ARG_OPTION_MASK);
str = bytes;
return this;
@@ -1770,8 +1772,7 @@ public IRubyObject encoding(ThreadContext context) {
@JRubyMethod(name = "fixed_encoding?", compat = CompatVersion.RUBY1_9)
public IRubyObject fixed_encoding_p(ThreadContext context) {
- Ruby runtime = context.getRuntime();
- return isKCodeDefault() ? runtime.getFalse() : runtime.getTrue();
+ return context.getRuntime().newBoolean(isKCodeDefault());
}
/** rb_reg_nth_match
@@ -53,9 +53,14 @@
private final int codeRange;
public StrNode(ISourcePosition position, ByteList value) {
+ this(position, value, StringSupport.codeRangeScan(value.getEncoding(), value));
+ }
+
+ public StrNode(ISourcePosition position, ByteList value, int codeRange) {
super(position);
+
this.value = value;
- codeRange = StringSupport.codeRangeScan(value.getEncoding(), value);
+ this.codeRange = codeRange;
}
public StrNode(ISourcePosition position, StrNode head, StrNode tail) {
@@ -59,6 +59,7 @@
import org.jruby.parser.ParserSupport;
import org.jruby.parser.Tokens;
import org.jruby.util.ByteList;
+import org.jruby.util.StringSupport;
/** This is a port of the MRI lexer to Java it is compatible to Ruby 1.8.1.
@@ -514,6 +515,23 @@ protected boolean isIdentifierChar(int c) {
protected boolean isMultiByteChar(int c) {
return encoding.codeToMbcLength(c) != 1;
}
+
+ public StrNode createStrNode(ISourcePosition position, ByteList buffer, int flags) {
+ Encoding bufferEncoding = buffer.getEncoding();
+ int codeRange = StringSupport.codeRangeScan(bufferEncoding, buffer);
+
+ if ((flags & RubyYaccLexer.STR_FUNC_REGEXP) == 0 && bufferEncoding.isAsciiCompatible()) {
+ // If we have characters outside 7-bit range and we are still ascii then change to ascii-8bit
+ if (codeRange == StringSupport.CR_7BIT) {
+ // Do nothing like MRI
+ } else if (getEncoding() == RubyYaccLexer.USASCII_ENCODING &&
+ bufferEncoding != RubyYaccLexer.UTF8_ENCODING) {
+ buffer.setEncoding(RubyYaccLexer.ASCII8BIT_ENCODING);
+ }
+ }
+
+ return new StrNode(position, buffer, codeRange);
+ }
/**
* What type/kind of quote are we dealing with?
@@ -30,12 +30,10 @@
import java.io.IOException;
import org.jcodings.Encoding;
import org.jruby.ast.RegexpNode;
-import org.jruby.ast.StrNode;
import org.jruby.lexer.yacc.SyntaxException.PID;
import org.jruby.parser.ReOptions;
import org.jruby.parser.Tokens;
import org.jruby.util.ByteList;
-import org.jruby.util.StringSupport;
public class StringTerm extends StrTerm {
private static final int ASCII = 16;
@@ -65,28 +63,7 @@ public StringTerm(int flags, int begin, int end) {
protected ByteList createByteList(RubyYaccLexer lexer) {
if (lexer.isOneEight()) return new ByteList();
- Encoding encoding = isRegexp() ? RubyYaccLexer.USASCII_ENCODING : lexer.getEncoding();
-
- return new ByteList(new byte[]{}, encoding);
- }
-
- protected boolean isRegexp() {
- return (flags & RubyYaccLexer.STR_FUNC_REGEXP) != 0;
- }
-
- protected StrNode createStrNode(RubyYaccLexer lexer, ByteList buffer) {
- Encoding encoding = buffer.getEncoding();
-
- if (!isRegexp() && encoding.isAsciiCompatible()) {
- // If we have characters outside 7-bit range and we are still ascii then change to ascii-8bit
- if (StringSupport.codeRangeScan(buffer.getEncoding(), buffer) != StringSupport.CR_7BIT &&
- lexer.getEncoding() == RubyYaccLexer.USASCII_ENCODING &&
- encoding != RubyYaccLexer.UTF8_ENCODING) {
- buffer.setEncoding(RubyYaccLexer.ASCII8BIT_ENCODING);
- }
- }
-
- return new StrNode(lexer.getPosition(), buffer);
+ return new ByteList(new byte[]{}, lexer.getEncoding());
}
private int endFound(RubyYaccLexer lexer, LexerSource src) throws IOException {
@@ -156,7 +133,7 @@ public int parseString(RubyYaccLexer lexer, LexerSource src) throws IOException
src.getCurrentLine(), "unterminated string meets end of file");
}
- lexer.setValue(createStrNode(lexer, buffer));
+ lexer.setValue(lexer.createStrNode(lexer.getPosition(), buffer, flags));
return Tokens.tSTRING_CONTENT;
}
@@ -1587,18 +1587,16 @@ private boolean is7BitASCII(ByteList value) {
return StringSupport.codeRangeScan(value.getEncoding(), value) == StringSupport.CR_7BIT;
}
- // TODO: The weird USASCII_ENCODING logic in the ifs are because I don't understand how
- // MRI differentiates between 'n' in their weird bit logic (mri: reg_fragment_setenc_gen)
public void setRegexpEncoding(RegexpNode end, ByteList value) {
Encoding optionsEncoding = extractEncodingFromOptions(end.getOptions());
// Change encoding to one specified by regexp options as long as the string is compatible.
- if (optionsEncoding != null && optionsEncoding != RubyYaccLexer.USASCII_ENCODING) {
+ if (optionsEncoding != null) {
if (optionsEncoding != value.getEncoding() && !is7BitASCII(value)) {
compileError(optionsEncoding, value.getEncoding());
}
value.setEncoding(optionsEncoding);
- } else if (optionsEncoding == RubyYaccLexer.USASCII_ENCODING) {
+ } else if ((end.getOptions() & 32) != 0) {
if (value.getEncoding() == RubyYaccLexer.ASCII8BIT_ENCODING && !is7BitASCII(value)) {
compileError(optionsEncoding, value.getEncoding());
}
Oops, something went wrong.

0 comments on commit 9c4779e

Please sign in to comment.