forked from adammck/fuzz
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 1498d7d
Showing
5 changed files
with
241 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/usr/bin/env ruby | ||
# vim: noet | ||
|
||
module Fuzz | ||
|
||
# The character inserted in place of a Token | ||
# when it is plucked out of a string (to prevent | ||
# the surrounding text from beind considered a | ||
# single token, when it is clearly not) | ||
Replacement = 0.chr | ||
|
||
# The regex chunk which is considered a valid | ||
# delimiter between tokens in a form submission. | ||
Delimiter = '\A|[\s;,]+|' + Replacement + '|\Z' | ||
|
||
end | ||
|
||
dir = File.dirname(__FILE__) | ||
require "#{dir}/fuzz/token.rb" | ||
require "#{dir}/fuzz/match.rb" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/usr/bin/env ruby | ||
# vim: noet | ||
|
||
module Fuzz | ||
class Match | ||
attr_reader :match_data, :captures, :delimiters | ||
|
||
def initialize(md) | ||
@match_data = md | ||
cap = md.captures | ||
|
||
# Break the captures from the delimiters | ||
# (the first and last) and token (others) | ||
# into their own accessors. Most of the | ||
# time, we're not interested capturing | ||
# the delimiters, and this slicing the | ||
# array every single time | ||
@delimiters = [cap.shift, cap.pop] | ||
@captures = cap | ||
end | ||
|
||
def [](index) | ||
@captures[index] | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
#!/usr/bin/env ruby | ||
# vim: noet | ||
|
||
module Fuzz | ||
module Token | ||
class Base | ||
|
||
|
||
def initialize | ||
|
||
# this class serves no purpose | ||
# by itself, because it will | ||
# never match anything. | ||
if self.class == Fuzz::Token::Base | ||
raise RuntimeError, "Fuzz::Token cannot be " +\ | ||
"instantiated directly. Use a subclass instead" | ||
end | ||
end | ||
|
||
# Returns the pattern (a string regex chunk) | ||
# matched by this class, or raises RuntimeError | ||
# if none is available. | ||
def pattern | ||
raise RuntimeError.new("#{self.class} has no pattern")\ | ||
unless self.class.const_defined?(:Pattern) | ||
|
||
# ruby doesn't consider the class body of | ||
# subclasses to be in this scope. weird. | ||
self.class.const_get(:Pattern) | ||
end | ||
|
||
|
||
def match(str) | ||
pat = pattern | ||
|
||
# If the pattern contains no captures, wrap | ||
# it in parenthesis to captures the whole | ||
# thing. This is vanity, so we can omit | ||
# the parenthesis from the Patterns of | ||
# simple Token subclasses. | ||
pat = "(" + pat + ")"\ | ||
unless pat.index "(" | ||
|
||
# attempt to match this class's patten wedged | ||
# between delimiters, and return the MatchData | ||
# wrapped in Fuzz::Match or nil (no match) | ||
del = "(" + Fuzz::Delimiter + ")" | ||
m = str.match(Regexp.new(del + pat + del)) | ||
(m == nil) ? nil : Fuzz::Match.new(m) | ||
end | ||
|
||
|
||
def extract(str) | ||
|
||
# attempt to match the token against _str_ | ||
# via Base#match, and abort it it failed | ||
fm = match(str) | ||
return nil\ | ||
if fm.nil? | ||
|
||
# return the Fuzz::Match and _str_ with the matched | ||
# token replace by Fuzz::Replacement, to continue parsing | ||
[fm, fm.match_data.pre_match + Fuzz::Replacement + fm.match_data.post_match] | ||
end | ||
|
||
|
||
def extract!(str) | ||
|
||
# call Token#extract first, | ||
# and abort it if failed | ||
ext = extract(str) | ||
return nil\ | ||
if ext.nil? | ||
|
||
# update the argument (the BANG warns | ||
# of the danger of this operation...), | ||
# and return the Fuzz::Match | ||
str.replace(ext[1]) | ||
ext[0] | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/usr/bin/env ruby | ||
# vim: noet | ||
|
||
module Fuzz::Token | ||
class Letters < Base | ||
Pattern = "[a-z]+" | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#!/usr/bin/env ruby | ||
# vim: noet | ||
|
||
|
||
# import rspec | ||
require "rubygems" | ||
require "spec" | ||
|
||
# import the fuzz gem for testing | ||
dir = File.dirname(__FILE__) | ||
require "#{dir}/../lib/fuzz.rb" | ||
|
||
|
||
|
||
|
||
class AbcToken < Fuzz::Token::Base | ||
Pattern = "[abc]+" | ||
end | ||
|
||
|
||
describe Fuzz::Token::Base do | ||
it "refuses to initialize" do | ||
lambda { Fuzz::Token::Base.new }.should raise_error | ||
end | ||
|
||
it "allows instances of subclasses" do | ||
lambda { AbcToken.new }.should_not raise_error | ||
end | ||
|
||
it "returns nil when no match is found" do | ||
AbcToken.new.match("xyzzy").should == nil | ||
end | ||
|
||
it "does not match tokens with no delimiters" do | ||
AbcToken.new.match("12345aabbcc98765").should == nil | ||
end | ||
|
||
it "returns Fuzz::Match when a match is found" do | ||
m = AbcToken.new.match("12345 aabbcc 98765") | ||
m.class.should == Fuzz::Match | ||
end | ||
|
||
|
||
describe "(extract)" do | ||
before(:each) do | ||
@abc = AbcToken.new | ||
end | ||
|
||
it "returns Array of Fuzz::Match and String" do | ||
e = @abc.extract("zzz a yyy") | ||
e.class.should == Array | ||
e[0].class.should == Fuzz::Match | ||
e[1].class.should == String | ||
end | ||
|
||
it "returns modified String" do | ||
e = @abc.extract("zzz a yyy") | ||
e[1].should == "zzz#{Fuzz::Replacement}yyy" | ||
end | ||
|
||
it "extracts multiple Tokens" do | ||
ext1 = @abc.extract("zzz a b c yyy") | ||
ext2 = @abc.extract(ext1[1]) | ||
ext3 = @abc.extract(ext2[1]) | ||
|
||
ext1[0].captures[0].should == "a" | ||
ext2[0].captures[0].should == "b" | ||
ext3[0].captures[0].should == "c" | ||
end | ||
end | ||
|
||
|
||
describe "(extract!)" do | ||
before(:each) do | ||
@abc = AbcToken.new | ||
end | ||
|
||
it "modifies the argument" do | ||
str = "xxx a yyy b zzz c yyy" | ||
@abc.extract!(str)[0].should == "a" | ||
@abc.extract!(str)[0].should == "b" | ||
@abc.extract!(str)[0].should == "c" | ||
end | ||
end | ||
end | ||
|
||
|
||
describe Fuzz::Match do | ||
before(:each) do | ||
@m = AbcToken.new.match("qqq;;;aabbcc,,,www") | ||
end | ||
|
||
it "returns extracted token data" do | ||
@m.captures.should == ["aabbcc"] | ||
end | ||
|
||
it "returns extracted delimiters" do | ||
@m.delimiters.should == [";;;", ",,,"] | ||
end | ||
|
||
it "allows array syntax access to captures" do | ||
@m[0].should == "aabbcc" | ||
end | ||
end |