Fix brace matching problem inside strings

Previously unmatched braces inside strings would cause tokenization errors. The expected behavior is that braces inside of strings would not participate in brace matching (as they are part of a string). This commit handles strings/brace-groups separately, making sure to match opening/closing braces/quotations correctly.
project-eutopia · Mar 12, 2020 · 2a3642e · 2a3642e
1 parent 3d434d4
commit 2a3642e
Show file tree

Hide file tree

Showing 4 changed files with 165 additions and 8 deletions.
diff --git a/lib/keisan/tokenizer.rb b/lib/keisan/tokenizer.rb
@@ -1,8 +1,6 @@
 module Keisan
   class Tokenizer
     TOKEN_CLASSES = [
-      Tokens::Group,
-      Tokens::String,
       Tokens::Null,
       Tokens::Boolean,
       Tokens::Word,
@@ -27,8 +25,21 @@ class Tokenizer
 
     def initialize(expression)
       @expression = self.class.normalize_expression(expression)
-      @scan = @expression.scan(TOKEN_REGEX)
-      @tokens = tokenize!
+      intermediate_tokens = parse_strings_and_groups
+      @tokens = intermediate_tokens.map do |sym, string|
+        case sym
+        when :string
+          Tokens::String.new(string)
+        when :group
+          Tokens::Group.new(string)
+        when :other
+          scan = string.scan(TOKEN_REGEX)
+          tokens = tokenize!(scan)
+          tokens
+        else
+          raise Keisan::Exceptions::TokenizingError.new("Internal error, unexpected symbol: #{sym}")
+        end
+      end.flatten
     end
 
     def self.normalize_expression(expression)
@@ -38,6 +49,118 @@ def self.normalize_expression(expression)
 
     private
 
+    def parse_strings_and_groups
+      braces = []
+      braces_start = nil
+
+      tokens = []
+
+      current_other = nil
+      current_string = nil
+
+      i = 0
+      while i < @expression.size
+        c = @expression[i]
+
+        if !braces.empty?
+          if current_string
+            # Escape character
+            if c == "\\"
+              i += 1
+              c = @expression[i]
+            # Exit string
+            elsif c == current_string[0]
+              current_string = nil
+            end
+
+          # Not in string
+          else
+            case c
+            # New string
+            when '"', "'"
+              current_string = c
+            # New opening brace
+            when "(", "[", "{"
+              braces << c
+            # Closing brace
+            when ")"
+              if braces[-1] != "("
+                raise Keisan::Exceptions::TokenizingError.new("Expected closing brace ')', found '#{c}'")
+              end
+              braces.pop
+            when "]"
+              if braces[-1] != "["
+                raise Keisan::Exceptions::TokenizingError.new("Expected closing brace ']', found '#{c}'")
+              end
+              braces.pop
+            when "}"
+              if braces[-1] != "{"
+                raise Keisan::Exceptions::TokenizingError.new("Expected closing brace '}', found '#{c}'")
+              end
+              braces.pop
+            end
+          end
+
+          if braces.empty?
+            tokens << [:group, @expression[braces_start..i]]
+          end
+        elsif current_string
+          # Escape character
+          if c == "\\"
+            i += 1
+            c = @expression[i]
+            current_string << c
+          else
+            current_string << c
+            # Exit string
+            if c == current_string[0]
+              tokens << [:string, current_string]
+              current_string = nil
+            end
+          end
+        else
+          case c
+          # New string
+          when '"', "'"
+            if current_other
+              tokens << [:other, current_other]
+              current_other = nil
+            end
+
+            current_string = c
+          # New opening brace
+          when "(", "[", "{"
+            if current_other
+              tokens << [:other, current_other]
+              current_other = nil
+            end
+
+            braces = [c]
+            braces_start = i
+          # Closing brace
+          when ")", "]", "}"
+            raise Keisan::Exceptions::TokenizingError.new("Found unmatched closing braced '#{c}'")
+          else
+            current_other ||= ""
+            current_other << c
+          end
+        end
+
+        i += 1
+      end
+
+      if current_other
+        tokens << [:other, current_other]
+        current_other = nil
+      end
+
+      if !braces.empty?
+        raise Keisan::Exceptions::TokenizingError.new("Found unmatched closing brace '#{braces[0]}'")
+      end
+
+      return tokens
+    end
+
     def self.normalize_line_delimiters(expression)
       expression.gsub(/\n/, ";")
     end
@@ -46,11 +169,12 @@ def self.remove_comments(expression)
       expression.gsub(/#[^;]*/, "")
     end
 
-    def tokenize!
-      @scan.map do |scan_result|
+    def tokenize!(scan)
+      scan.map do |scan_result|
         i = scan_result.find_index {|token| !token.nil?}
         token_string = scan_result[i]
         token = TOKEN_CLASSES[i].new(token_string)
+        # binding.pry
         if token.is_a?(Tokens::Unknown)
           raise Keisan::Exceptions::TokenizingError.new("Unexpected token: \"#{token.string}\"")
         end

diff --git a/lib/keisan/tokens/group.rb b/lib/keisan/tokens/group.rb
@@ -1,12 +1,13 @@
 module Keisan
   module Tokens
     class Group < Token
-      REGEX = /(\((?:[^\[\]\(\)\{\}]*+\g<1>*+)*+\)|\[(?:[^\[\]\(\)\{\}]*+\g<1>*+)*+\]|\{(?:[^\[\]\(\)\{\}]*+\g<1>*+)*+\})/
+      REGEX = /(\(|\)|\[|\]|\{|\})/
 
       attr_reader :sub_tokens
 
       def initialize(string)
-        super
+        @string = string
+        raise Exceptions::InvalidToken.new(string) unless string[0].match(regex) && string[-1].match(regex)
         @sub_tokens = Tokenizer.new(string[1...-1]).tokens
       end
 

diff --git a/spec/keisan/calculator_spec.rb b/spec/keisan/calculator_spec.rb
@@ -131,6 +131,12 @@
     end
   end
 
+  describe "unmatched braces inside strings" do
+    it "does not match against actual braces outside strings" do
+      expect(calculator.evaluate("'1'+'2'+(']\\]') + (('3') + '4')")).to eq "12]]34"
+    end
+  end
+
   describe "#simplify" do
     it "allows for undefined variables to still exist and returns a string representation of the expression" do
       expect{calculator.evaluate("0*x+1")}.to raise_error(Keisan::Exceptions::UndefinedVariableError)

diff --git a/spec/keisan/tokenizer_spec.rb b/spec/keisan/tokenizer_spec.rb
@@ -223,6 +223,32 @@
       end
     end
 
+    it "has nested groups properly tokenized" do
+      tokenizer = described_class.new("'1'+'2'+(']]') + (('3') + '4')")
+
+      expect(tokenizer.tokens.map(&:class)).to match_array([
+        Keisan::Tokens::String,
+        Keisan::Tokens::ArithmeticOperator,
+        Keisan::Tokens::String,
+        Keisan::Tokens::ArithmeticOperator,
+        Keisan::Tokens::Group,
+        Keisan::Tokens::ArithmeticOperator,
+        Keisan::Tokens::Group
+      ])
+
+      group = tokenizer.tokens[4]
+      expect(group.string).to eq "(']]')"
+      expect(group.sub_tokens.map(&:class)).to match_array([Keisan::Tokens::String])
+
+      group = tokenizer.tokens[6]
+      expect(group.string).to eq "(('3') + '4')"
+      expect(group.sub_tokens.map(&:class)).to match_array([
+        Keisan::Tokens::Group,
+        Keisan::Tokens::ArithmeticOperator,
+        Keisan::Tokens::String
+      ])
+    end
+
     it "has nested groups properly tokenized" do
       tokenizer = described_class.new("1+(2+(3+4)+5)+(6)")