Permalink
Browse files

Updates tokenizer to correctly parse `/*comment*/i`

  • Loading branch information...
1 parent 5fe3051 commit 2f07d7a0df26ba01bf55d8e8381ba3aa4ed91971 Alex Smith committed Oct 2, 2012
Showing with 23 additions and 2 deletions.
  1. +1 −1 lib/rkelly/constants.rb
  2. +4 −0 lib/rkelly/token.rb
  3. +10 −1 lib/rkelly/tokenizer.rb
  4. +8 −0 test/test_tokenizer.rb
View
@@ -1,3 +1,3 @@
module RKelly
- VERSION = '1.0.5'
+ VERSION = '1.0.6'
end
View
@@ -11,5 +11,9 @@ def to_racc_token
return transformer.call(name, value) if transformer
[name, value]
end
+
+ def to_s
+ return "#{self.name}: #{self.value}"
+ end
end
end
View
@@ -90,7 +90,16 @@ def initialize(&block)
end
end
- token(:REGEXP, /\A\/(?:[^\/\r\n\\]*(?:\\[^\r\n][^\/\r\n\\]*)*)\/[gi]*/)
+ # To distinguish regular expressions from comments, we require that
+ # regular expressions start with a non * character (ie, not look like
+ # /*foo*/). Note that we can't depend on the length of the match to
+ # correctly distinguish, since `/**/i` is longer if matched as a regular
+ # expression than as matched as a comment.
+ # Incidentally, we're also not matching empty regular expressions
+ # (eg, // and //g). Here we could depend on match length and priority to
+ # determine that these are actually comments, but it turns out to be
+ # easier to not match them in the first place.
+ token(:REGEXP, /\A\/(?:[^\/\r\n\\*]|\\[^\r\n])[^\/\r\n\\]*(?:\\[^\r\n][^\/\r\n\\]*)*\/[gi]*/)
token(:S, /\A[\s\r\n]*/m)
token(:SINGLE_CHAR, /\A./) do |type, value|
View
@@ -127,6 +127,14 @@ def test_regular_expression_is_found_if_prev_token_is_non_literal_keyword
end
end
+ def test_regular_expression_is_not_found_if_block_comment_with_re_modifier
+ tokens = @tokenizer.tokenize("/**/i")
+ assert_tokens([
+ [:COMMENT, "/**/"],
+ [:IDENT, "i"]
+ ], tokens)
+ end
+
def test_comment_assign
tokens = @tokenizer.tokenize("foo = /**/;")
assert_tokens([

0 comments on commit 2f07d7a

Please sign in to comment.