Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
adammck committed Dec 31, 2008
0 parents commit 1498d7d
Show file tree
Hide file tree
Showing 5 changed files with 241 additions and 0 deletions.
20 changes: 20 additions & 0 deletions lib/fuzz.rb
@@ -0,0 +1,20 @@
#!/usr/bin/env ruby
# vim: noet

module Fuzz

# The character inserted in place of a Token
# when it is plucked out of a string (to prevent
# the surrounding text from beind considered a
# single token, when it is clearly not)
Replacement = 0.chr

# The regex chunk which is considered a valid
# delimiter between tokens in a form submission.
Delimiter = '\A|[\s;,]+|' + Replacement + '|\Z'

end

dir = File.dirname(__FILE__)
require "#{dir}/fuzz/token.rb"
require "#{dir}/fuzz/match.rb"
26 changes: 26 additions & 0 deletions lib/fuzz/match.rb
@@ -0,0 +1,26 @@
#!/usr/bin/env ruby
# vim: noet

module Fuzz
class Match
attr_reader :match_data, :captures, :delimiters

def initialize(md)
@match_data = md
cap = md.captures

# Break the captures from the delimiters
# (the first and last) and token (others)
# into their own accessors. Most of the
# time, we're not interested capturing
# the delimiters, and this slicing the
# array every single time
@delimiters = [cap.shift, cap.pop]
@captures = cap
end

def [](index)
@captures[index]
end
end
end
83 changes: 83 additions & 0 deletions lib/fuzz/token.rb
@@ -0,0 +1,83 @@
#!/usr/bin/env ruby
# vim: noet

module Fuzz
module Token
class Base


def initialize

# this class serves no purpose
# by itself, because it will
# never match anything.
if self.class == Fuzz::Token::Base
raise RuntimeError, "Fuzz::Token cannot be " +\
"instantiated directly. Use a subclass instead"
end
end

# Returns the pattern (a string regex chunk)
# matched by this class, or raises RuntimeError
# if none is available.
def pattern
raise RuntimeError.new("#{self.class} has no pattern")\
unless self.class.const_defined?(:Pattern)

# ruby doesn't consider the class body of
# subclasses to be in this scope. weird.
self.class.const_get(:Pattern)
end


def match(str)
pat = pattern

# If the pattern contains no captures, wrap
# it in parenthesis to captures the whole
# thing. This is vanity, so we can omit
# the parenthesis from the Patterns of
# simple Token subclasses.
pat = "(" + pat + ")"\
unless pat.index "("

# attempt to match this class's patten wedged
# between delimiters, and return the MatchData
# wrapped in Fuzz::Match or nil (no match)
del = "(" + Fuzz::Delimiter + ")"
m = str.match(Regexp.new(del + pat + del))
(m == nil) ? nil : Fuzz::Match.new(m)
end


def extract(str)

# attempt to match the token against _str_
# via Base#match, and abort it it failed
fm = match(str)
return nil\
if fm.nil?

# return the Fuzz::Match and _str_ with the matched
# token replace by Fuzz::Replacement, to continue parsing
[fm, fm.match_data.pre_match + Fuzz::Replacement + fm.match_data.post_match]
end


def extract!(str)

# call Token#extract first,
# and abort it if failed
ext = extract(str)
return nil\
if ext.nil?

# update the argument (the BANG warns
# of the danger of this operation...),
# and return the Fuzz::Match
str.replace(ext[1])
ext[0]
end
end
end
end
8 changes: 8 additions & 0 deletions lib/fuzz/token/letters.rb
@@ -0,0 +1,8 @@
#!/usr/bin/env ruby
# vim: noet

module Fuzz::Token
class Letters < Base
Pattern = "[a-z]+"
end
end
104 changes: 104 additions & 0 deletions spec/token.rb
@@ -0,0 +1,104 @@
#!/usr/bin/env ruby
# vim: noet


# import rspec
require "rubygems"
require "spec"

# import the fuzz gem for testing
dir = File.dirname(__FILE__)
require "#{dir}/../lib/fuzz.rb"




class AbcToken < Fuzz::Token::Base
Pattern = "[abc]+"
end


describe Fuzz::Token::Base do
it "refuses to initialize" do
lambda { Fuzz::Token::Base.new }.should raise_error
end

it "allows instances of subclasses" do
lambda { AbcToken.new }.should_not raise_error
end

it "returns nil when no match is found" do
AbcToken.new.match("xyzzy").should == nil
end

it "does not match tokens with no delimiters" do
AbcToken.new.match("12345aabbcc98765").should == nil
end

it "returns Fuzz::Match when a match is found" do
m = AbcToken.new.match("12345 aabbcc 98765")
m.class.should == Fuzz::Match
end


describe "(extract)" do
before(:each) do
@abc = AbcToken.new
end

it "returns Array of Fuzz::Match and String" do
e = @abc.extract("zzz a yyy")
e.class.should == Array
e[0].class.should == Fuzz::Match
e[1].class.should == String
end

it "returns modified String" do
e = @abc.extract("zzz a yyy")
e[1].should == "zzz#{Fuzz::Replacement}yyy"
end

it "extracts multiple Tokens" do
ext1 = @abc.extract("zzz a b c yyy")
ext2 = @abc.extract(ext1[1])
ext3 = @abc.extract(ext2[1])

ext1[0].captures[0].should == "a"
ext2[0].captures[0].should == "b"
ext3[0].captures[0].should == "c"
end
end


describe "(extract!)" do
before(:each) do
@abc = AbcToken.new
end

it "modifies the argument" do
str = "xxx a yyy b zzz c yyy"
@abc.extract!(str)[0].should == "a"
@abc.extract!(str)[0].should == "b"
@abc.extract!(str)[0].should == "c"
end
end
end


describe Fuzz::Match do
before(:each) do
@m = AbcToken.new.match("qqq;;;aabbcc,,,www")
end

it "returns extracted token data" do
@m.captures.should == ["aabbcc"]
end

it "returns extracted delimiters" do
@m.delimiters.should == [";;;", ",,,"]
end

it "allows array syntax access to captures" do
@m[0].should == "aabbcc"
end
end

0 comments on commit 1498d7d

Please sign in to comment.