Add #with_tco and helpers to evaluate all code in a block with TCO en…

…abled
tdg5 · Nov 1, 2015 · 5f0786b · 5f0786b
1 parent 6ff7f40
commit 5f0786b
Show file tree

Hide file tree

Showing 7 changed files with 424 additions and 0 deletions.
diff --git a/lib/tco_method.rb b/lib/tco_method.rb
@@ -1,6 +1,7 @@
 require "method_source"
 require "tco_method/version"
 require "tco_method/mixin"
+require "tco_method/block_with_tco"
 
 # The namespace for the TCOMethod gem. Home to private API methods employed by
 # the {TCOMethod::Mixin} module to provide tail call optimized behavior to
@@ -28,4 +29,36 @@ def self.tco_eval(code, file = nil, path = nil, line = nil)
     raise ArgumentError, "Invalid code string!" unless code.is_a?(String)
     RubyVM::InstructionSequence.new(code, file, path, line, ISEQ_OPTIONS).eval
   end
+
+  # Allows for executing a block of code with tail call optimization enabled.
+  #
+  # All code that is evaluated in the block will be evaluated with tail call
+  # optimization enabled, however here be dragons, so be warned of a few things:
+  #
+  # 1. Though it may not be obvious, any call to `require`, `load`, or similar
+  # methods from within the block will be evaluated by another part of the VM
+  # and will not be tail call optimized. This applies for `tco_eval` as well.
+  #
+  # 2. The block will be evaluated with a different binding than the binding it
+  # was defined in. That means that references to variables or other binding
+  # context will result in method errors. For example:
+  #
+  #     some_variable = "Hello, World!"
+  #     womp_womp = TCOMethod.with_tco { some_variable }
+  #     # => NameError: Undefined local variable or method 'some_variable'
+  #
+  # 3. Though this approach is some what nicer than working with strings of
+  # code, it comes with the tradeoff that it relies on the the `method_source`
+  # gem to do the work of finding the source of the block. There are situations
+  # where `method_source` can't accurately determine the source location of a
+  # block. That said, if you don't format your code like a maniac, you should be
+  # fine.
+  #
+  # @param [Proc] block The proc to evaluate with tail call optimization
+  #   enabled.
+  # @return [Object] Returns whatever the result of evaluating the given block.
+  def self.with_tco(&block)
+    raise ArgumentError, "Block required" unless block_given?
+    BlockWithTCO.new(block).result
+  end
 end
diff --git a/lib/tco_method/ambiguous_source_error.rb b/lib/tco_method/ambiguous_source_error.rb
@@ -0,0 +1,42 @@
+module TCOMethod
+  # Exception raised when it's not possible to reliably determine the source
+  # code of a block.
+  class AmbiguousSourceError < StandardError
+    # Default message template.
+    MESSAGE = "Could not determine source of block".freeze
+
+    # Returns the exception that this exception was created to wrap if any such
+    # exception exists. Used only when this exception is created to wrap
+    # another.
+    attr_accessor :original_exception
+
+    # Create an exception from a problematic block.
+    #
+    # @param [Proc] block The block for which the source is ambiguous.
+    # @return [AmbiguousBlockError] A new exception instance wrapping the given
+    #   exception.
+    def self.from_proc(block)
+      new(MESSAGE + " #{block.inspect}")
+    end
+
+    # Wrap another exception with an AmbiguousBlockError. Useful for wrapping
+    # errors raised by MethodSource.
+    #
+    # @param [Exception] exception The exception instance that should be
+    #   wrapped.
+    # @return [AmbiguousBlockError] A new exception instance wrapping the given
+    #   exception.
+    def self.wrap(exception)
+      error = new(exception.message)
+      error.original_exception = exception
+      error
+    end
+
+    # Creates a new instance of the exception.
+    #
+    # @param [String] message The message to use with the exception.
+    def initialize(message = MESSAGE)
+      super
+    end
+  end
+end
diff --git a/lib/tco_method/block_extractor.rb b/lib/tco_method/block_extractor.rb
@@ -0,0 +1,107 @@
+require "tco_method/ambiguous_source_error"
+require "method_source"
+require "ripper"
+
+module TCOMethod
+  # Object encapsulating the logic to extract the source code of a given block.
+  class BlockExtractor
+    DO_STR = "do".freeze
+    END_STR = "end".freeze
+
+    attr_reader :source
+
+    def initialize(block)
+      source = block.source
+      type = block.lambda? ? :lambda : :proc
+      start_offset, end_offset = determine_offsets(block, source)
+      @source = "#{type} #{source[start_offset..end_offset]}"
+    rescue MethodSource::SourceNotFoundError => ex
+      raise AmbiguousSourceError.wrap(ex)
+    end
+
+    private
+
+    # Encapsulates the logic required to determine the offset of the end of the
+    # block. The end of the block is characterized by a matching curly brace
+    # (`}`) or the `end` keyword.
+    def determine_end_offset(block, tokens, source, expected_matcher)
+      lines = source.lines
+      last_line_number = lines.length
+      end_offset = nil
+      tokens.reverse_each do |token|
+        # Break once we're through with the last line.
+        break if token[0][0] != last_line_number
+
+        # Look for expected match to block opener
+        next if token[1] != expected_matcher
+        next if token[1] == :on_kw && token[2] != END_STR
+
+        # Raise if we've already found something that looks like a block end.
+        raise AmbiguousSourceError.from_proc(block) if end_offset
+        # Ending offset is the position of the ending token, plus the length of
+        # that token.
+        end_offset = token[0][1] + token[2].length
+      end
+      raise AmbiguousSourceError.from_proc(block) unless end_offset
+      determine_end_offset_relative_to_source(end_offset, lines.last.length)
+    end
+
+    # We subract the length of the last line from end offset to determine the
+    # negative offset into the source string. However we must subtract 1 to
+    # correct for the negative offset referring to the character after the
+    # desired terminal character.
+    def determine_end_offset_relative_to_source(end_offset, last_line_length)
+      end_offset - last_line_length - 1
+    end
+
+    # Tokenizes the source of the block as determined by the `method_source` gem
+    # and determines the beginning and end of the block.
+    #
+    # In both cases the entire line is checked to ensure there's no unexpected
+    # ambiguity as to the start or end of the block. See the test file for this
+    # class for examples of ambiguous situations.
+    #
+    # @param [Proc] block The proc for which the starting offset of its source
+    # code should be determined.
+    # @param [String] source The source code of the provided block.
+    # @raise [AmbiguousSourceError] Raised when the source of the block cannot
+    #   be determined unambiguously.
+    # @return [Array<Integer>] The start and end offsets of the block's source
+    #   code as 2-element Array.
+    def determine_offsets(block, source)
+      tokens = Ripper.lex(source)
+      start_offset, start_token = determine_start_offset(block, tokens)
+      expected_match = start_token == :on_kw ? :on_kw : :on_rbrace
+      end_offset = determine_end_offset(block, tokens, source, expected_match)
+      [start_offset, end_offset]
+    end
+
+    # The logic required to determine the starting offset of the block. The
+    # start of the block is characterized by the opening left curly brace (`{`)
+    # of the block or the `do` keyword. Everything prior to the start of the
+    # block is ignored because we can determine whether the block should be a
+    # lambda or a proc by asking the block directly, and we may not always have
+    # such a keyword available to us, e.g. a method that takes a block like
+    # TCOMethod.with_tco.
+    def determine_start_offset(block, tokens)
+      start_offset = start_token = nil
+      # The start of the block should occur somewhere on line 1.
+      # Check the whole line to ensure there aren't multiple blocks on the line.
+      tokens.each do |token|
+        # Break after line 1.
+        break if token[0][0] != 1
+
+        # Look for a left brace (`{`) or `do` keyword.
+        if token[1] == :on_lbrace || (token[1] == :on_kw && token[2] == DO_STR)
+          # Raise if we've already found something that looks like a block
+          # start.
+          raise AmbiguousSourceError.from_proc(block) if start_offset
+          start_token = token[1]
+          start_offset = token[0][1]
+        end
+      end
+      raise AmbiguousSourceError.from_proc(block) unless start_offset
+      [start_offset, start_token]
+    end
+  end
+end
diff --git a/lib/tco_method/block_with_tco.rb b/lib/tco_method/block_with_tco.rb
@@ -0,0 +1,22 @@
+require "tco_method/block_extractor"
+
+module TCOMethod
+  class BlockWithTCO
+    attr_reader :result
+
+    def initialize(&block)
+      raise ArgumentError, "Block required" unless block
+      @result = eval(block)
+    end
+
+    private
+
+    def extract_source(block)
+      BlockExtractor.new(block).source
+    end
+
+    def eval(block)
+      TCOMethod.tco_eval(extract_source(block)).call
+    end
+  end
+end
diff --git a/test/unit/block_extractor_test.rb b/test/unit/block_extractor_test.rb
@@ -0,0 +1,150 @@
+require "pry"
+require "test_helper"
+
+module TCOMethod
+  class BlockExtractorTest < TestCase
+    Subject = BlockExtractor
+    subject { Subject }
+
+    blocks = [
+      :lambda_brace_inline,
+      :lambda_brace_multi,
+      :lambda_do_inline,
+      :lambda_do_multi,
+      :method_brace_inline,
+      :method_brace_multi,
+      :method_do_inline,
+      :method_do_multi,
+      :proc_brace_inline,
+      :proc_brace_multi,
+      :proc_do_inline,
+      :proc_do_multi,
+    ]
+
+    unsourceable_blocks = [
+      :ambiguous_procs,
+      :a_hash_with_an_ambiguous_proc,
+      :an_ambiguous_proc_with_hash,
+      :an_unsourceable_proc,
+    ]
+
+    context "block extraction" do
+      blocks.each do |meth|
+        should "extract block in #{meth} form" do
+          block = send(meth)
+          block_source = subject.new(block).source
+          reblock = eval(block_source)
+          reblock_result = reblock.call
+
+          # Ensure both blocks return the same result
+          assert_equal block.call, reblock_result
+
+          # Ensure a lambda is used where appropriate
+          assert_equal reblock_result == :lambda, reblock.lambda?
+        end
+      end
+
+      unsourceable_blocks.each do |meth|
+        should "raise when given a #{meth}" do
+          block = send(meth)
+          assert_raises(AmbiguousSourceError) { subject.new(block).source }
+        end
+      end
+
+      should "correctly strip trailing code at the end of the block" do
+        # The ').source' below should be plenty to test this concern.
+        block_source = subject.new(lambda do
+          "Hold on to your butts"
+        end).source
+        begin
+          eval(block_source)
+        rescue SyntaxError
+          assert false, "Syntax error in block source"
+        end
+      end
+    end
+
+    # This ambiguity could be handled, but encourages poorly formatted code and
+    # doesn't seem worth the effort presently.
+    def a_hash_with_an_ambiguous_proc
+      {}; proc { :proc }
+    end
+
+    def ambiguous_procs
+      proc { :please }; proc { :dont_do_this }
+    end
+
+    def an_unsourceable_proc
+      {
+        :block => proc { :method_source_error }
+      }[:block]
+    end
+
+    # This ambiguity could be handled, but encourages poorly formatted code and
+    # doesn't seem worth the effort presently.
+    def an_ambiguous_proc_with_hash
+      block = proc { :proc }; {}
+      block
+    end
+
+    def lambda_brace_inline
+      lambda { :lambda }
+    end
+
+    def lambda_brace_multi
+      lambda {
+        :lambda
+      }
+    end
+
+    def lambda_do_inline
+      lambda do; :lambda; end
+    end
+
+    def lambda_do_multi
+      lambda do
+        :lambda
+      end
+    end
+
+    def method_brace_inline
+      Proc.new { :proc }
+    end
+
+    def method_brace_multi
+      Proc.new {
+        :proc
+      }
+    end
+
+    def method_do_inline
+      Proc.new do; :proc; end
+    end
+
+    def method_do_multi
+      Proc.new do
+        :proc
+      end
+    end
+
+    def proc_do_inline
+      proc do; :proc; end
+    end
+
+    def proc_do_multi
+      proc do
+        :proc
+      end
+    end
+
+    def proc_brace_inline
+      proc { :proc }
+    end
+
+    def proc_brace_multi
+      proc {
+        :proc
+      }
+    end
+  end
+end