Skip to content

Commit

Permalink
Add #with_tco and helpers to evaluate all code in a block with TCO en…
Browse files Browse the repository at this point in the history
…abled
  • Loading branch information
tdg5 committed Nov 1, 2015
1 parent 6ff7f40 commit 5f0786b
Show file tree
Hide file tree
Showing 7 changed files with 424 additions and 0 deletions.
33 changes: 33 additions & 0 deletions lib/tco_method.rb
@@ -1,6 +1,7 @@
require "method_source"
require "tco_method/version"
require "tco_method/mixin"
require "tco_method/block_with_tco"

# The namespace for the TCOMethod gem. Home to private API methods employed by
# the {TCOMethod::Mixin} module to provide tail call optimized behavior to
Expand Down Expand Up @@ -28,4 +29,36 @@ def self.tco_eval(code, file = nil, path = nil, line = nil)
raise ArgumentError, "Invalid code string!" unless code.is_a?(String)
RubyVM::InstructionSequence.new(code, file, path, line, ISEQ_OPTIONS).eval
end

# Allows for executing a block of code with tail call optimization enabled.
#
# All code that is evaluated in the block will be evaluated with tail call
# optimization enabled, however here be dragons, so be warned of a few things:
#
# 1. Though it may not be obvious, any call to `require`, `load`, or similar
# methods from within the block will be evaluated by another part of the VM
# and will not be tail call optimized. This applies for `tco_eval` as well.
#
# 2. The block will be evaluated with a different binding than the binding it
# was defined in. That means that references to variables or other binding
# context will result in method errors. For example:
#
# some_variable = "Hello, World!"
# womp_womp = TCOMethod.with_tco { some_variable }
# # => NameError: Undefined local variable or method 'some_variable'
#
# 3. Though this approach is some what nicer than working with strings of
# code, it comes with the tradeoff that it relies on the the `method_source`
# gem to do the work of finding the source of the block. There are situations
# where `method_source` can't accurately determine the source location of a
# block. That said, if you don't format your code like a maniac, you should be
# fine.
#
# @param [Proc] block The proc to evaluate with tail call optimization
# enabled.
# @return [Object] Returns whatever the result of evaluating the given block.
def self.with_tco(&block)
raise ArgumentError, "Block required" unless block_given?
BlockWithTCO.new(block).result
end
end
42 changes: 42 additions & 0 deletions lib/tco_method/ambiguous_source_error.rb
@@ -0,0 +1,42 @@
module TCOMethod
# Exception raised when it's not possible to reliably determine the source
# code of a block.
class AmbiguousSourceError < StandardError
# Default message template.
MESSAGE = "Could not determine source of block".freeze

# Returns the exception that this exception was created to wrap if any such
# exception exists. Used only when this exception is created to wrap
# another.
attr_accessor :original_exception

# Create an exception from a problematic block.
#
# @param [Proc] block The block for which the source is ambiguous.
# @return [AmbiguousBlockError] A new exception instance wrapping the given
# exception.
def self.from_proc(block)
new(MESSAGE + " #{block.inspect}")
end

# Wrap another exception with an AmbiguousBlockError. Useful for wrapping
# errors raised by MethodSource.
#
# @param [Exception] exception The exception instance that should be
# wrapped.
# @return [AmbiguousBlockError] A new exception instance wrapping the given
# exception.
def self.wrap(exception)
error = new(exception.message)
error.original_exception = exception
error
end

# Creates a new instance of the exception.
#
# @param [String] message The message to use with the exception.
def initialize(message = MESSAGE)
super
end
end
end
107 changes: 107 additions & 0 deletions lib/tco_method/block_extractor.rb
@@ -0,0 +1,107 @@
require "tco_method/ambiguous_source_error"
require "method_source"
require "ripper"

module TCOMethod
# Object encapsulating the logic to extract the source code of a given block.
class BlockExtractor
DO_STR = "do".freeze
END_STR = "end".freeze

attr_reader :source

def initialize(block)
source = block.source
type = block.lambda? ? :lambda : :proc
start_offset, end_offset = determine_offsets(block, source)
@source = "#{type} #{source[start_offset..end_offset]}"
rescue MethodSource::SourceNotFoundError => ex
raise AmbiguousSourceError.wrap(ex)
end

private

# Encapsulates the logic required to determine the offset of the end of the
# block. The end of the block is characterized by a matching curly brace
# (`}`) or the `end` keyword.
def determine_end_offset(block, tokens, source, expected_matcher)
lines = source.lines
last_line_number = lines.length
end_offset = nil
tokens.reverse_each do |token|
# Break once we're through with the last line.
break if token[0][0] != last_line_number

# Look for expected match to block opener
next if token[1] != expected_matcher
next if token[1] == :on_kw && token[2] != END_STR

# Raise if we've already found something that looks like a block end.
raise AmbiguousSourceError.from_proc(block) if end_offset
# Ending offset is the position of the ending token, plus the length of
# that token.
end_offset = token[0][1] + token[2].length
end
raise AmbiguousSourceError.from_proc(block) unless end_offset
determine_end_offset_relative_to_source(end_offset, lines.last.length)
end

# We subract the length of the last line from end offset to determine the
# negative offset into the source string. However we must subtract 1 to
# correct for the negative offset referring to the character after the
# desired terminal character.
def determine_end_offset_relative_to_source(end_offset, last_line_length)
end_offset - last_line_length - 1
end

# Tokenizes the source of the block as determined by the `method_source` gem
# and determines the beginning and end of the block.
#
# In both cases the entire line is checked to ensure there's no unexpected
# ambiguity as to the start or end of the block. See the test file for this
# class for examples of ambiguous situations.
#
# @param [Proc] block The proc for which the starting offset of its source
# code should be determined.
# @param [String] source The source code of the provided block.
# @raise [AmbiguousSourceError] Raised when the source of the block cannot
# be determined unambiguously.
# @return [Array<Integer>] The start and end offsets of the block's source
# code as 2-element Array.
def determine_offsets(block, source)
tokens = Ripper.lex(source)
start_offset, start_token = determine_start_offset(block, tokens)
expected_match = start_token == :on_kw ? :on_kw : :on_rbrace
end_offset = determine_end_offset(block, tokens, source, expected_match)
[start_offset, end_offset]
end

# The logic required to determine the starting offset of the block. The
# start of the block is characterized by the opening left curly brace (`{`)
# of the block or the `do` keyword. Everything prior to the start of the
# block is ignored because we can determine whether the block should be a
# lambda or a proc by asking the block directly, and we may not always have
# such a keyword available to us, e.g. a method that takes a block like
# TCOMethod.with_tco.
def determine_start_offset(block, tokens)
start_offset = start_token = nil
# The start of the block should occur somewhere on line 1.
# Check the whole line to ensure there aren't multiple blocks on the line.
tokens.each do |token|
# Break after line 1.
break if token[0][0] != 1

# Look for a left brace (`{`) or `do` keyword.
if token[1] == :on_lbrace || (token[1] == :on_kw && token[2] == DO_STR)
# Raise if we've already found something that looks like a block
# start.
raise AmbiguousSourceError.from_proc(block) if start_offset
start_token = token[1]
start_offset = token[0][1]
end
end
raise AmbiguousSourceError.from_proc(block) unless start_offset
[start_offset, start_token]
end
end
end
22 changes: 22 additions & 0 deletions lib/tco_method/block_with_tco.rb
@@ -0,0 +1,22 @@
require "tco_method/block_extractor"

module TCOMethod
class BlockWithTCO
attr_reader :result

def initialize(&block)
raise ArgumentError, "Block required" unless block
@result = eval(block)
end

private

def extract_source(block)
BlockExtractor.new(block).source
end

def eval(block)
TCOMethod.tco_eval(extract_source(block)).call
end
end
end
150 changes: 150 additions & 0 deletions test/unit/block_extractor_test.rb
@@ -0,0 +1,150 @@
require "pry"
require "test_helper"

module TCOMethod
class BlockExtractorTest < TestCase
Subject = BlockExtractor
subject { Subject }

blocks = [
:lambda_brace_inline,
:lambda_brace_multi,
:lambda_do_inline,
:lambda_do_multi,
:method_brace_inline,
:method_brace_multi,
:method_do_inline,
:method_do_multi,
:proc_brace_inline,
:proc_brace_multi,
:proc_do_inline,
:proc_do_multi,
]

unsourceable_blocks = [
:ambiguous_procs,
:a_hash_with_an_ambiguous_proc,
:an_ambiguous_proc_with_hash,
:an_unsourceable_proc,
]

context "block extraction" do
blocks.each do |meth|
should "extract block in #{meth} form" do
block = send(meth)
block_source = subject.new(block).source
reblock = eval(block_source)
reblock_result = reblock.call

# Ensure both blocks return the same result
assert_equal block.call, reblock_result

# Ensure a lambda is used where appropriate
assert_equal reblock_result == :lambda, reblock.lambda?
end
end

unsourceable_blocks.each do |meth|
should "raise when given a #{meth}" do
block = send(meth)
assert_raises(AmbiguousSourceError) { subject.new(block).source }
end
end

should "correctly strip trailing code at the end of the block" do
# The ').source' below should be plenty to test this concern.
block_source = subject.new(lambda do
"Hold on to your butts"
end).source
begin
eval(block_source)
rescue SyntaxError
assert false, "Syntax error in block source"
end
end
end

# This ambiguity could be handled, but encourages poorly formatted code and
# doesn't seem worth the effort presently.
def a_hash_with_an_ambiguous_proc
{}; proc { :proc }
end

def ambiguous_procs
proc { :please }; proc { :dont_do_this }
end

def an_unsourceable_proc
{
:block => proc { :method_source_error }
}[:block]
end

# This ambiguity could be handled, but encourages poorly formatted code and
# doesn't seem worth the effort presently.
def an_ambiguous_proc_with_hash
block = proc { :proc }; {}
block
end

def lambda_brace_inline
lambda { :lambda }
end

def lambda_brace_multi
lambda {
:lambda
}
end

def lambda_do_inline
lambda do; :lambda; end
end

def lambda_do_multi
lambda do
:lambda
end
end

def method_brace_inline
Proc.new { :proc }
end

def method_brace_multi
Proc.new {
:proc
}
end

def method_do_inline
Proc.new do; :proc; end
end

def method_do_multi
Proc.new do
:proc
end
end

def proc_do_inline
proc do; :proc; end
end

def proc_do_multi
proc do
:proc
end
end

def proc_brace_inline
proc { :proc }
end

def proc_brace_multi
proc {
:proc
}
end
end
end

0 comments on commit 5f0786b

Please sign in to comment.