Skip to content

Commit

Permalink
First real commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Edgar committed Jan 26, 2011
1 parent 3ac9398 commit ba984c5
Show file tree
Hide file tree
Showing 6 changed files with 578 additions and 9 deletions.
426 changes: 426 additions & 0 deletions Background.md

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions README.rdoc → README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
= object_regex
# object_regex

Description goes here.
Provides regex-based searches on sequences of arbitrary objects. Developed for querying Ruby token streams, object_regex only requires that the
objects you are searching implement a single method that returns a string.

== Note on Patches/Pull Requests
## Note on Patches/Pull Requests

* Fork the project.
* Make your feature addition or bug fix.
Expand All @@ -12,6 +13,6 @@ Description goes here.
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
* Send me a pull request. Bonus points for topic branches.

== Copyright
## Copyright

Copyright (c) 2011 Michael Edgar. See LICENSE for details.
4 changes: 4 additions & 0 deletions lib/object_regex.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
if RUBY_VERSION < "1.9"
raise 'object_regex is only compatible with Ruby 1.9 or greater.'
end
require 'object_regex/implementation'
90 changes: 90 additions & 0 deletions lib/object_regex/implementation.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Provides general-purpose regex searching on any object implementing #reg_desc.
# See design_docs/object_regex for the mini-paper explaining it. With any luck,
# this will make it into Ripper so I won't have to do this here.
class ObjectRegex
def initialize(pattern)
@map = generate_map(pattern)
@pattern = generate_pattern(pattern)
end

def mapped_value(reg_desc)
@map[reg_desc] || @map[:FAILBOAT]
end

################## Mapping Generation #########################

TOKEN_MATCHER = /[A-Za-z][\w]*/
MAPPING_CHARS = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a
def generate_map(pattern)
alphabet = pattern.scan(TOKEN_MATCHER).uniq
repr_size = Math.log(alphabet.size + 1, MAPPING_CHARS.size).ceil
@item_size = repr_size + 1

map = Hash[alphabet.map.with_index do |symbol, idx|
[symbol, mapping_for_idx(repr_size, idx)]
end]
map.merge!(FAILBOAT: mapping_for_idx(repr_size, map.size))
end

def mapping_for_idx(repr_size, idx)
convert_to_mapping_radix(repr_size, idx).map do |char|
MAPPING_CHARS[char]
end.join + ';'
end

def convert_to_mapping_radix(repr_size, num)
result = []
repr_size.times do
result.unshift(num % MAPPING_CHARS.size)
num /= MAPPING_CHARS.size
end
result
end

################## Pattern transformation #################

def generate_pattern(pattern)
replace_tokens(fix_dots(remove_ranges(pattern)))
end

def remove_ranges(pattern)
pattern.gsub(/\[([\w\t ]*)\]/) do |match|
'(?:' + match[1..-2].split(/\s+/).join('|') + ')'
end
end

def fix_dots(pattern)
pattern.gsub('.', '.' * (@item_size - 1) + ';')
end

def replace_tokens(pattern)
pattern.gsub(TOKEN_MATCHER) do |match|
'(?:' + mapped_value(match) + ')'
end.gsub(/\s/, '')
end

############# Matching ##########################

def match(input, pos=0)
new_input = mapped_input(input)
if (match = new_input.match(@pattern, pos))
start, stop = match.begin(0) / @item_size, match.end(0) / @item_size
input[start...stop]
end
end

def all_matches(input)
new_input = mapped_input(input)
result, pos = [], 0
while (match = new_input.match(@pattern, pos))
start, stop = match.begin(0) / @item_size, match.end(0) / @item_size
result << input[start...stop]
pos = match.end(0)
end
result
end

def mapped_input(input)
input.map { |object| object.reg_desc }.map { |desc| mapped_value(desc) }.join
end
end
57 changes: 53 additions & 4 deletions spec/object_regex_spec.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,56 @@
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')

describe "ObjectRegex" do
it "fails" do
fail "hey buddy, you should probably rename this file and start specing for real"
if RUBY_VERSION < "1.9"
describe 'ObjectRegex' do
it 'will raise upon loading under Ruby 1.8' do
expect { require 'object_regex' }.to raise_error(RuntimeError)
end
end
end
else
require 'object_regex'
class Token < Struct.new(:type, :contents)
def reg_desc
type.to_s
end
end

describe ObjectRegex do
context 'with a small input alphabet' do
before do
@input = [Token.new(:str, '"hello"'),
Token.new(:str, '"there"'),
Token.new(:int, '2'),
Token.new(:str, '"worldagain"'),
Token.new(:str, '"highfive"'),
Token.new(:int, '5'),
Token.new(:str, 'jklkjl'),
Token.new(:int, '3'),
Token.new(:comment, '#lol'),
Token.new(:str, ''),
Token.new(:comment, '#no pairs'),
Token.new(:str, 'jkl'),
Token.new(:eof, '')]
end

it 'matches a simple token stream with a simple search pattern' do
matches = ObjectRegex.new('(str int)+').all_matches(@input)
matches.should == [@input[1..2], @input[4..7]]
end

it "matches the 'anything' dot" do
ObjectRegex.new('int .').all_matches(@input).should ==
[@input[2..3], @input[5..6], @input[7..8]]
end

it 'works with ranges ([xyz] syntax)' do
ObjectRegex.new('str [int comment]').all_matches(@input).should ==
[@input[1..2], @input[4..5], @input[6..7], @input[9..10]]
end

it 'works with count syntax (eg {1,2})' do
ObjectRegex.new('str{2,3}').all_matches(@input).should ==
[@input[0..1], @input[3..4]]
end
end
end
end
1 change: 0 additions & 1 deletion spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
$LOAD_PATH.unshift(File.dirname(__FILE__))
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
require 'object_regex'
require 'spec'
require 'spec/autorun'

Expand Down

0 comments on commit ba984c5

Please sign in to comment.