-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Michael Edgar
committed
Jan 26, 2011
1 parent
3ac9398
commit ba984c5
Showing
6 changed files
with
578 additions
and
9 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
if RUBY_VERSION < "1.9" | ||
raise 'object_regex is only compatible with Ruby 1.9 or greater.' | ||
end | ||
require 'object_regex/implementation' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# Provides general-purpose regex searching on any object implementing #reg_desc. | ||
# See design_docs/object_regex for the mini-paper explaining it. With any luck, | ||
# this will make it into Ripper so I won't have to do this here. | ||
class ObjectRegex | ||
def initialize(pattern) | ||
@map = generate_map(pattern) | ||
@pattern = generate_pattern(pattern) | ||
end | ||
|
||
def mapped_value(reg_desc) | ||
@map[reg_desc] || @map[:FAILBOAT] | ||
end | ||
|
||
################## Mapping Generation ######################### | ||
|
||
TOKEN_MATCHER = /[A-Za-z][\w]*/ | ||
MAPPING_CHARS = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a | ||
def generate_map(pattern) | ||
alphabet = pattern.scan(TOKEN_MATCHER).uniq | ||
repr_size = Math.log(alphabet.size + 1, MAPPING_CHARS.size).ceil | ||
@item_size = repr_size + 1 | ||
|
||
map = Hash[alphabet.map.with_index do |symbol, idx| | ||
[symbol, mapping_for_idx(repr_size, idx)] | ||
end] | ||
map.merge!(FAILBOAT: mapping_for_idx(repr_size, map.size)) | ||
end | ||
|
||
def mapping_for_idx(repr_size, idx) | ||
convert_to_mapping_radix(repr_size, idx).map do |char| | ||
MAPPING_CHARS[char] | ||
end.join + ';' | ||
end | ||
|
||
def convert_to_mapping_radix(repr_size, num) | ||
result = [] | ||
repr_size.times do | ||
result.unshift(num % MAPPING_CHARS.size) | ||
num /= MAPPING_CHARS.size | ||
end | ||
result | ||
end | ||
|
||
################## Pattern transformation ################# | ||
|
||
def generate_pattern(pattern) | ||
replace_tokens(fix_dots(remove_ranges(pattern))) | ||
end | ||
|
||
def remove_ranges(pattern) | ||
pattern.gsub(/\[([\w\t ]*)\]/) do |match| | ||
'(?:' + match[1..-2].split(/\s+/).join('|') + ')' | ||
end | ||
end | ||
|
||
def fix_dots(pattern) | ||
pattern.gsub('.', '.' * (@item_size - 1) + ';') | ||
end | ||
|
||
def replace_tokens(pattern) | ||
pattern.gsub(TOKEN_MATCHER) do |match| | ||
'(?:' + mapped_value(match) + ')' | ||
end.gsub(/\s/, '') | ||
end | ||
|
||
############# Matching ########################## | ||
|
||
def match(input, pos=0) | ||
new_input = mapped_input(input) | ||
if (match = new_input.match(@pattern, pos)) | ||
start, stop = match.begin(0) / @item_size, match.end(0) / @item_size | ||
input[start...stop] | ||
end | ||
end | ||
|
||
def all_matches(input) | ||
new_input = mapped_input(input) | ||
result, pos = [], 0 | ||
while (match = new_input.match(@pattern, pos)) | ||
start, stop = match.begin(0) / @item_size, match.end(0) / @item_size | ||
result << input[start...stop] | ||
pos = match.end(0) | ||
end | ||
result | ||
end | ||
|
||
def mapped_input(input) | ||
input.map { |object| object.reg_desc }.map { |desc| mapped_value(desc) }.join | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,56 @@ | ||
require File.expand_path(File.dirname(__FILE__) + '/spec_helper') | ||
|
||
describe "ObjectRegex" do | ||
it "fails" do | ||
fail "hey buddy, you should probably rename this file and start specing for real" | ||
if RUBY_VERSION < "1.9" | ||
describe 'ObjectRegex' do | ||
it 'will raise upon loading under Ruby 1.8' do | ||
expect { require 'object_regex' }.to raise_error(RuntimeError) | ||
end | ||
end | ||
end | ||
else | ||
require 'object_regex' | ||
class Token < Struct.new(:type, :contents) | ||
def reg_desc | ||
type.to_s | ||
end | ||
end | ||
|
||
describe ObjectRegex do | ||
context 'with a small input alphabet' do | ||
before do | ||
@input = [Token.new(:str, '"hello"'), | ||
Token.new(:str, '"there"'), | ||
Token.new(:int, '2'), | ||
Token.new(:str, '"worldagain"'), | ||
Token.new(:str, '"highfive"'), | ||
Token.new(:int, '5'), | ||
Token.new(:str, 'jklkjl'), | ||
Token.new(:int, '3'), | ||
Token.new(:comment, '#lol'), | ||
Token.new(:str, ''), | ||
Token.new(:comment, '#no pairs'), | ||
Token.new(:str, 'jkl'), | ||
Token.new(:eof, '')] | ||
end | ||
|
||
it 'matches a simple token stream with a simple search pattern' do | ||
matches = ObjectRegex.new('(str int)+').all_matches(@input) | ||
matches.should == [@input[1..2], @input[4..7]] | ||
end | ||
|
||
it "matches the 'anything' dot" do | ||
ObjectRegex.new('int .').all_matches(@input).should == | ||
[@input[2..3], @input[5..6], @input[7..8]] | ||
end | ||
|
||
it 'works with ranges ([xyz] syntax)' do | ||
ObjectRegex.new('str [int comment]').all_matches(@input).should == | ||
[@input[1..2], @input[4..5], @input[6..7], @input[9..10]] | ||
end | ||
|
||
it 'works with count syntax (eg {1,2})' do | ||
ObjectRegex.new('str{2,3}').all_matches(@input).should == | ||
[@input[0..1], @input[3..4]] | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters