Skip to content
This repository
Browse code

Merge remote-tracking branch 'github/master' into prepend

  • Loading branch information...
commit f1ff8dd3060792d36550b8755b91137d2dec3a9f 2 parents d4f1931 + 0001ac3
Charles Oliver Nutter headius authored
2  spec/regression/JRUBY-5122_nonblocking_io_spec.rb
@@ -214,7 +214,7 @@
214 214 # Oracle's build block with > 131072 (2**17)
215 215 # On a Windows 7(64) box:
216 216 # Oracle's build does not block (use memory till OOMException)
217   - SOCKET_CHANNEL_MIGHT_BLOCK = "a" * (65536 * 4)
  217 + SOCKET_CHANNEL_MIGHT_BLOCK = "a" * (219463 * 4)
218 218
219 219 it "should not block for write" do
220 220 100.times do # for acceleration; it failed w/o wait_for_accepted call
61 src/org/jruby/ext/ripper/RipperLexer.java
@@ -433,6 +433,10 @@ private boolean isEND() {
433 433 private boolean isARG() {
434 434 return lex_state == LexState.EXPR_ARG || lex_state == LexState.EXPR_CMDARG;
435 435 }
  436 +
  437 + private boolean isSpaceArg(int c, boolean spaceSeen) {
  438 + return isARG() && spaceSeen && !Character.isWhitespace(c);
  439 + }
436 440
437 441 private void determineExpressionState() {
438 442 switch (lex_state) {
@@ -1121,6 +1125,10 @@ private int yylex() throws IOException {
1121 1125 boolean spaceSeen = false;
1122 1126 boolean commandState;
1123 1127
  1128 + // FIXME: Sucks we do this n times versus one since it is only important at beginning of parse but we need to change
  1129 + // setup of parser differently.
  1130 + if (token == 0 && src.getLine() == 0) detectUTF8BOM();
  1131 +
1124 1132 if (lex_strterm != null) {
1125 1133 int tok = lex_strterm.parseString(this, src);
1126 1134 if (tok == Tokens.tSTRING_END || tok == Tokens.tREGEXP_END) {
@@ -1413,7 +1421,7 @@ private int ampersand(boolean spaceSeen) throws IOException {
1413 1421 //if the warning is generated, the getPosition() on line 954 (this line + 18) will create
1414 1422 //a wrong position if the "inclusive" flag is not set.
1415 1423 ISourcePosition tmpPosition = getPosition();
1416   - if (isARG() && spaceSeen && !Character.isWhitespace(c)) {
  1424 + if (isSpaceArg(c, spaceSeen)) {
1417 1425 IRubyWarnings warnings = getRuntime().getWarnings();
1418 1426 if (warnings.isVerbose()) warnings.warning(IRubyWarnings.ID.ARGUMENT_AS_PREFIX, tmpPosition, "`&' interpreted as argument prefix");
1419 1427 c = Tokens.tAMPER;
@@ -1953,7 +1961,7 @@ private int minus(boolean spaceSeen) throws IOException {
1953 1961 setState(LexState.EXPR_ARG);
1954 1962 return Tokens.tLAMBDA;
1955 1963 }
1956   - if (isBEG() || (isARG() && spaceSeen && !Character.isWhitespace(c))) {
  1964 + if (isBEG() || isSpaceArg(c, spaceSeen)) {
1957 1965 if (isARG()) arg_ambiguous();
1958 1966 setState(LexState.EXPR_BEG);
1959 1967 src.unread(c);
@@ -1978,7 +1986,7 @@ private int percent(boolean spaceSeen) throws IOException {
1978 1986 return Tokens.tOP_ASGN;
1979 1987 }
1980 1988
1981   - if (isARG() && spaceSeen && !Character.isWhitespace(c)) return parseQuote(c);
  1989 + if (isSpaceArg(c, spaceSeen)) return parseQuote(c);
1982 1990
1983 1991 determineExpressionState();
1984 1992
@@ -2026,7 +2034,7 @@ private int plus(boolean spaceSeen) throws IOException {
2026 2034 return Tokens.tOP_ASGN;
2027 2035 }
2028 2036
2029   - if (isBEG() || (isARG() && spaceSeen && !Character.isWhitespace(c))) {
  2037 + if (isBEG() || isSpaceArg(c, spaceSeen)) {
2030 2038 if (isARG()) arg_ambiguous();
2031 2039 setState(LexState.EXPR_BEG);
2032 2040 src.unread(c);
@@ -2161,12 +2169,10 @@ private int slash(boolean spaceSeen) throws IOException {
2161 2169 return Tokens.tOP_ASGN;
2162 2170 }
2163 2171 src.unread(c);
2164   - if (isARG() && spaceSeen) {
2165   - if (!Character.isWhitespace(c)) {
2166   - arg_ambiguous();
2167   - lex_strterm = new StringTerm(str_regexp, '\0', '/');
2168   - return Tokens.tREGEXP_BEG;
2169   - }
  2172 + if (isSpaceArg(c, spaceSeen)) {
  2173 + arg_ambiguous();
  2174 + lex_strterm = new StringTerm(str_regexp, '\0', '/');
  2175 + return Tokens.tREGEXP_BEG;
2170 2176 }
2171 2177
2172 2178 determineExpressionState();
@@ -2191,7 +2197,7 @@ private int star(boolean spaceSeen) throws IOException {
2191 2197 return Tokens.tOP_ASGN;
2192 2198 default:
2193 2199 src.unread(c);
2194   - if (isARG() && spaceSeen && !Character.isWhitespace(c)) {
  2200 + if (isSpaceArg(c, spaceSeen)) {
2195 2201 IRubyWarnings warnings = getRuntime().getWarnings();
2196 2202
2197 2203 if (warnings.isVerbose()) warnings.warning(IRubyWarnings.ID.ARGUMENT_AS_PREFIX, getPosition(), "`*' interpreted as argument prefix");
@@ -2535,11 +2541,6 @@ public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean symbolL
2535 2541 buffer.setEncoding(UTF8_ENCODING);
2536 2542 if (stringLiteral) tokenAddMBC(codepoint, buffer);
2537 2543 } else if (stringLiteral) {
2538   - if (codepoint == 0 && symbolLiteral) {
2539   - throw new SyntaxException(SyntaxException.PID.INVALID_ESCAPE_SYNTAX, getPosition(),
2540   - getCurrentLine(), "symbol cannot contain '\\u0000'");
2541   - }
2542   -
2543 2544 buffer.append((char) codepoint);
2544 2545 }
2545 2546 } while (src.peek(' ') || src.peek('\t'));
@@ -2555,11 +2556,6 @@ public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean symbolL
2555 2556 buffer.setEncoding(UTF8_ENCODING);
2556 2557 if (stringLiteral) tokenAddMBC(codepoint, buffer);
2557 2558 } else if (stringLiteral) {
2558   - if (codepoint == 0 && symbolLiteral) {
2559   - throw new SyntaxException(SyntaxException.PID.INVALID_ESCAPE_SYNTAX, getPosition(),
2560   - getCurrentLine(), "symbol cannot contain '\\u0000'");
2561   - }
2562   -
2563 2559 buffer.append((char) codepoint);
2564 2560 }
2565 2561 }
@@ -2711,4 +2707,27 @@ private char scanOct(int count) throws IOException {
2711 2707
2712 2708 return value;
2713 2709 }
  2710 +
  2711 + // FIXME: Also sucks that matchMarker will strip off valuable bytes and not work for this (could be a one-liner)
  2712 + private void detectUTF8BOM() throws IOException {
  2713 + int b1 = src.read();
  2714 + if (b1 == 0xef) {
  2715 + int b2 = src.read();
  2716 + if (b2 == 0xbb) {
  2717 + int b3 = src.read();
  2718 + if (b3 == 0xbf) {
  2719 + setEncoding(UTF8_ENCODING);
  2720 + } else {
  2721 + src.unread(b3);
  2722 + src.unread(b2);
  2723 + src.unread(b1);
  2724 + }
  2725 + } else {
  2726 + src.unread(b2);
  2727 + src.unread(b1);
  2728 + }
  2729 + } else {
  2730 + src.unread(b1);
  2731 + }
  2732 + }
2714 2733 }
2  src/org/jruby/internal/runtime/methods/DefaultMethod.java
@@ -142,8 +142,6 @@ private DynamicMethod tryJitReturnMethod(ThreadContext context) {
142 142 // use the class name
143 143 className = implementationClass.getName();
144 144 }
145   - // replace double-colons with dots, to match Java
146   - className.replaceAll("::", ".");
147 145 context.runtime.getJITCompiler().tryJIT(this, context, className, name);
148 146 return box.actualMethod;
149 147 }
105 src/org/jruby/lexer/yacc/RubyYaccLexer.java
@@ -159,6 +159,29 @@ private void warn_balanced(int c, boolean spaceSeen, String op, String syn) {
159 159 ambiguousOperator(op, syn);
160 160 }
161 161 }
  162 +
  163 + // FIXME: Also sucks that matchMarker will strip off valuable bytes and not work for this (could be a one-liner)
  164 + private void detectUTF8BOM() throws IOException {
  165 + int b1 = src.read();
  166 + if (b1 == 0xef) {
  167 + int b2 = src.read();
  168 + if (b2 == 0xbb) {
  169 + int b3 = src.read();
  170 + if (b3 == 0xbf) {
  171 + setEncoding(UTF8_ENCODING);
  172 + } else {
  173 + src.unread(b3);
  174 + src.unread(b2);
  175 + src.unread(b1);
  176 + }
  177 + } else {
  178 + src.unread(b2);
  179 + src.unread(b1);
  180 + }
  181 + } else {
  182 + src.unread(b1);
  183 + }
  184 + }
162 185
163 186 public enum Keyword {
164 187 END ("end", Tokens.kEND, Tokens.kEND, LexState.EXPR_END),
@@ -275,6 +298,7 @@ public static Keyword getKeyword(String str) {
275 298
276 299 // Are we lexing Ruby 1.8 or 1.9+ syntax
277 300 private boolean isOneEight;
  301 + private boolean isTwoZero;
278 302 // Count of nested parentheses (1.9 only)
279 303 private int parenNest = 0;
280 304 // 1.9 only
@@ -311,6 +335,7 @@ public final void reset() {
311 335 resetStacks();
312 336 lex_strterm = null;
313 337 commandStart = true;
  338 + if (parserSupport != null) isTwoZero = parserSupport.getConfiguration().getVersion().is2_0();
314 339 }
315 340
316 341 public int nextToken() throws IOException {
@@ -467,6 +492,10 @@ private boolean isEND() {
467 492 private boolean isARG() {
468 493 return lex_state == LexState.EXPR_ARG || lex_state == LexState.EXPR_CMDARG;
469 494 }
  495 +
  496 + private boolean isSpaceArg(int c, boolean spaceSeen) {
  497 + return isARG() && spaceSeen && !Character.isWhitespace(c);
  498 + }
470 499
471 500 private void determineExpressionState() {
472 501 switch (lex_state) {
@@ -621,10 +650,26 @@ private int parseQuote(int c) throws IOException {
621 650 setState(LexState.EXPR_FNAME);
622 651 yaccValue = new Token("%"+c+begin, getPosition());
623 652 return Tokens.tSYMBEG;
624   -
  653 +
  654 + case 'I':
  655 + if (isTwoZero) {
  656 + lex_strterm = new StringTerm(str_dquote | STR_FUNC_QWORDS, begin, end);
  657 + do {c = src.read();} while (Character.isWhitespace(c));
  658 + src.unread(c);
  659 + yaccValue = new Token("%" + c + begin, getPosition());
  660 + return Tokens.tSYMBOLS_BEG;
  661 + }
  662 + case 'i':
  663 + if (isTwoZero) {
  664 + lex_strterm = new StringTerm(/* str_squote | */STR_FUNC_QWORDS, begin, end);
  665 + do {c = src.read();} while (Character.isWhitespace(c));
  666 + src.unread(c);
  667 + yaccValue = new Token("%" + c + begin, getPosition());
  668 + return Tokens.tQSYMBOLS_BEG;
  669 + }
625 670 default:
626   - throw new SyntaxException(PID.STRING_UNKNOWN_TYPE, getPosition(), getCurrentLine(),
627   - "Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '" + c + "'.");
  671 + throw new SyntaxException(PID.STRING_UNKNOWN_TYPE,
  672 + getPosition(), getCurrentLine(), "unknown type of %string");
628 673 }
629 674 }
630 675
@@ -966,7 +1011,7 @@ private int yylex2() throws IOException {
966 1011
967 1012 return currentToken;
968 1013 }
969   -
  1014 +
970 1015 /**
971 1016 * Returns the next token. Also sets yyVal is needed.
972 1017 *
@@ -976,6 +1021,10 @@ private int yylex() throws IOException {
976 1021 int c;
977 1022 boolean spaceSeen = false;
978 1023 boolean commandState;
  1024 +
  1025 + // FIXME: Sucks we do this n times versus one since it is only important at beginning of parse but we need to change
  1026 + // setup of parser differently.
  1027 + if (token == 0 && src.getLine() == 0) detectUTF8BOM();
979 1028
980 1029 if (lex_strterm != null) {
981 1030 int tok = lex_strterm.parseString(this, src);
@@ -1255,7 +1304,7 @@ private int ampersand(boolean spaceSeen) throws IOException {
1255 1304 //if the warning is generated, the getPosition() on line 954 (this line + 18) will create
1256 1305 //a wrong position if the "inclusive" flag is not set.
1257 1306 ISourcePosition tmpPosition = getPosition();
1258   - if (isARG() && spaceSeen && !Character.isWhitespace(c)) {
  1307 + if (isSpaceArg(c, spaceSeen)) {
1259 1308 if (warnings.isVerbose()) warnings.warning(ID.ARGUMENT_AS_PREFIX, tmpPosition, "`&' interpreted as argument prefix");
1260 1309 c = Tokens.tAMPER;
1261 1310 } else if (isBEG()) {
@@ -1854,7 +1903,7 @@ private int minus(boolean spaceSeen) throws IOException {
1854 1903 yaccValue = new Token("->", getPosition());
1855 1904 return Tokens.tLAMBDA;
1856 1905 }
1857   - if (isBEG() || (isARG() && spaceSeen && !Character.isWhitespace(c))) {
  1906 + if (isBEG() || isSpaceArg(c, spaceSeen)) {
1858 1907 if (isARG()) arg_ambiguous();
1859 1908 setState(LexState.EXPR_BEG);
1860 1909 src.unread(c);
@@ -1881,8 +1930,8 @@ private int percent(boolean spaceSeen) throws IOException {
1881 1930 yaccValue = new Token("%", getPosition());
1882 1931 return Tokens.tOP_ASGN;
1883 1932 }
1884   -
1885   - if (isARG() && spaceSeen && !Character.isWhitespace(c)) return parseQuote(c);
  1933 +
  1934 + if (isSpaceArg(c, spaceSeen)) return parseQuote(c);
1886 1935
1887 1936 determineExpressionState();
1888 1937
@@ -1938,7 +1987,7 @@ private int plus(boolean spaceSeen) throws IOException {
1938 1987 return Tokens.tOP_ASGN;
1939 1988 }
1940 1989
1941   - if (isBEG() || (isARG() && spaceSeen && !Character.isWhitespace(c))) {
  1990 + if (isBEG() || isSpaceArg(c, spaceSeen)) { //FIXME: arg_ambiguous missing
1942 1991 if (isARG()) arg_ambiguous();
1943 1992 setState(LexState.EXPR_BEG);
1944 1993 src.unread(c);
@@ -2084,13 +2133,11 @@ private int slash(boolean spaceSeen) throws IOException {
2084 2133 return Tokens.tOP_ASGN;
2085 2134 }
2086 2135 src.unread(c);
2087   - if (isARG() && spaceSeen) {
2088   - if (!Character.isWhitespace(c)) {
2089   - arg_ambiguous();
2090   - lex_strterm = new StringTerm(str_regexp, '\0', '/');
2091   - yaccValue = new Token("/",getPosition());
2092   - return Tokens.tREGEXP_BEG;
2093   - }
  2136 + if (isSpaceArg(c, spaceSeen)) {
  2137 + arg_ambiguous();
  2138 + lex_strterm = new StringTerm(str_regexp, '\0', '/');
  2139 + yaccValue = new Token("/",getPosition());
  2140 + return Tokens.tREGEXP_BEG;
2094 2141 }
2095 2142
2096 2143 determineExpressionState();
@@ -2110,9 +2157,19 @@ private int star(boolean spaceSeen) throws IOException {
2110 2157 yaccValue = new Token("**", getPosition());
2111 2158 return Tokens.tOP_ASGN;
2112 2159 }
2113   - src.unread(c);
  2160 +
  2161 + src.unread(c); // not a '=' put it back
2114 2162 yaccValue = new Token("**", getPosition());
2115   - c = Tokens.tPOW;
  2163 +
  2164 + if (isTwoZero && isSpaceArg(c, spaceSeen)) {
  2165 + if (warnings.isVerbose()) warnings.warning(ID.ARGUMENT_AS_PREFIX, getPosition(), "`**' interpreted as argument prefix");
  2166 + c = Tokens.tDSTAR;
  2167 + } else if (isTwoZero && isBEG()) {
  2168 + c = Tokens.tDSTAR;
  2169 + } else {
  2170 + if (!isOneEight) warn_balanced(c, spaceSeen, "*", "argument prefix");
  2171 + c = Tokens.tPOW;
  2172 + }
2116 2173 break;
2117 2174 case '=':
2118 2175 setState(LexState.EXPR_BEG);
@@ -2120,7 +2177,7 @@ private int star(boolean spaceSeen) throws IOException {
2120 2177 return Tokens.tOP_ASGN;
2121 2178 default:
2122 2179 src.unread(c);
2123   - if (isARG() && spaceSeen && !Character.isWhitespace(c)) {
  2180 + if (isSpaceArg(c, spaceSeen)) {
2124 2181 if (warnings.isVerbose()) warnings.warning(ID.ARGUMENT_AS_PREFIX, getPosition(), "`*' interpreted as argument prefix");
2125 2182 c = Tokens.tSTAR;
2126 2183 } else if (isBEG()) {
@@ -2463,11 +2520,6 @@ public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean symbolL
2463 2520 buffer.setEncoding(UTF8_ENCODING);
2464 2521 if (stringLiteral) tokenAddMBC(codepoint, buffer);
2465 2522 } else if (stringLiteral) {
2466   - if (codepoint == 0 && symbolLiteral) {
2467   - throw new SyntaxException(PID.INVALID_ESCAPE_SYNTAX, getPosition(),
2468   - getCurrentLine(), "symbol cannot contain '\\u0000'");
2469   - }
2470   -
2471 2523 buffer.append((char) codepoint);
2472 2524 }
2473 2525 } while (src.peek(' ') || src.peek('\t'));
@@ -2483,11 +2535,6 @@ public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean symbolL
2483 2535 buffer.setEncoding(UTF8_ENCODING);
2484 2536 if (stringLiteral) tokenAddMBC(codepoint, buffer);
2485 2537 } else if (stringLiteral) {
2486   - if (codepoint == 0 && symbolLiteral) {
2487   - throw new SyntaxException(PID.INVALID_ESCAPE_SYNTAX, getPosition(),
2488   - getCurrentLine(), "symbol cannot contain '\\u0000'");
2489   - }
2490   -
2491 2538 buffer.append((char) codepoint);
2492 2539 }
2493 2540 }
1  src/org/jruby/parser/.#Ruby19Parser.y
7 src/org/jruby/parser/ParserSupport.java
@@ -1145,6 +1145,13 @@ public DStrNode createDStrNode(ISourcePosition position) {
1145 1145 return new DStrNode(position);
1146 1146 }
1147 1147
  1148 + public Node asSymbol(ISourcePosition position, Node value) {
  1149 + // FIXME: This might have an encoding issue since toString generally uses iso-8859-1
  1150 + if (value instanceof StrNode) return new SymbolNode(position, ((StrNode) value).getValue().toString());
  1151 +
  1152 + return new DSymbolNode(position, (DStrNode) value);
  1153 + }
  1154 +
1148 1155 public Node literal_concat(ISourcePosition position, Node head, Node tail) {
1149 1156 if (head == null) return tail;
1150 1157 if (tail == null) return head;
880 src/org/jruby/parser/Ruby20Parser.java
440 additions, 440 deletions not shown
4 src/org/jruby/parser/Ruby20Parser.y
@@ -1703,7 +1703,7 @@ symbol_list : /* none */ {
1703 1703 $$ = new ArrayNode(lexer.getPosition());
1704 1704 }
1705 1705 | symbol_list word ' ' {
1706   - $$ = $1.add($2 instanceof EvStrNode ? new DSymbolNode($1.getPosition()).add($2) : $2);
  1706 + $$ = $1.add($2 instanceof EvStrNode ? new DSymbolNode($1.getPosition()).add($2) : support.asSymbol($1.getPosition(), $2));
1707 1707 }
1708 1708
1709 1709 qwords : tQWORDS_BEG ' ' tSTRING_END {
@@ -1734,7 +1734,7 @@ qsym_list : /* none */ {
1734 1734 $$ = new ArrayNode(lexer.getPosition());
1735 1735 }
1736 1736 | qsym_list tSTRING_CONTENT ' ' {
1737   - $$ = $1.add($2);
  1737 + $$ = $1.add(support.asSymbol($1.getPosition(), $2));
1738 1738 }
1739 1739
1740 1740 string_contents : /* none */ {
5 src/org/jruby/parser/Tokens.java
@@ -157,7 +157,10 @@
157 157 int tLAMBDA = DefaultRubyParser.tLAMBDA;
158 158 int tLAMBEG = DefaultRubyParser.tLAMBEG;
159 159 int tLABEL = DefaultRubyParser.tLABEL;
160   -
  160 + int tSYMBOLS_BEG = Ruby20Parser.tSYMBOLS_BEG;
  161 + int tQSYMBOLS_BEG = Ruby20Parser.tQSYMBOLS_BEG;
  162 + int tDSTAR = Ruby20Parser.tDSTAR;
  163 +
161 164 String[] operators = {"+@", "-@", "**", "<=>", "==", "===", "!=", ">=", "<=", "&&",
162 165 "||", "=~", "!~", "..", "...", "[]", "[]=", "<<", ">>", "::"};
163 166 }
2  test/externals/ruby1.9/excludes/TestParse.rb
@@ -2,7 +2,5 @@
2 2 exclude :test_assign_in_conditional, "needs investigation"
3 3 exclude :test_invalid_char, "needs investigation"
4 4 exclude :test_question, "needs investigation"
5   -exclude :test_symbol, "needs investigation"
6   -exclude :test_utf8_bom, "needs investigation"
7 5 exclude :test_void_expr_stmts_value, "needs investigation"
8 6 exclude :test_xstring, "needs investigation"

0 comments on commit f1ff8dd

Please sign in to comment.
Something went wrong with that request. Please try again.