initial commit

methodmissing · Dec 31, 2008 · 1498d7d · 1498d7d
commit 1498d7d
Show file tree

Hide file tree

Showing 5 changed files with 241 additions and 0 deletions.
diff --git a/lib/fuzz.rb b/lib/fuzz.rb
@@ -0,0 +1,20 @@
+#!/usr/bin/env ruby
+# vim: noet
+
+module Fuzz
+
+	# The character inserted in place of a Token
+	# when it is plucked out of a string (to prevent
+	# the surrounding text from beind considered a
+	# single token, when it is clearly not)
+	Replacement = 0.chr
+
+	# The regex chunk which is considered a valid
+	# delimiter between tokens in a form submission.
+	Delimiter = '\A|[\s;,]+|' + Replacement + '|\Z'
+
+end
+
+dir = File.dirname(__FILE__)
+require "#{dir}/fuzz/token.rb"
+require "#{dir}/fuzz/match.rb"
diff --git a/lib/fuzz/match.rb b/lib/fuzz/match.rb
@@ -0,0 +1,26 @@
+#!/usr/bin/env ruby
+# vim: noet
+
+module Fuzz
+	class Match
+		attr_reader :match_data, :captures, :delimiters
+
+		def initialize(md)
+			@match_data = md
+			cap = md.captures
+
+			# Break the captures from the delimiters
+			# (the first and last) and token (others)
+			# into their own accessors. Most of the
+			# time, we're not interested capturing
+			# the delimiters, and this slicing the
+			# array every single time
+			@delimiters = [cap.shift, cap.pop]
+			@captures = cap
+		end
+
+		def [](index)
+			@captures[index]
+		end
+	end
+end
diff --git a/lib/fuzz/token.rb b/lib/fuzz/token.rb
@@ -0,0 +1,83 @@
+#!/usr/bin/env ruby
+# vim: noet
+
+module Fuzz
+	module Token
+		class Base
+
+
+			def initialize
+
+				# this class serves no purpose
+				# by itself, because it will
+				# never match anything.
+				if self.class == Fuzz::Token::Base
+					raise RuntimeError, "Fuzz::Token cannot be " +\
+					"instantiated directly. Use a subclass instead"
+				end
+			end
+
+			# Returns the pattern (a string regex chunk)
+			# matched by this class, or raises RuntimeError
+			# if none is available.
+			def pattern
+				raise RuntimeError.new("#{self.class} has no pattern")\
+					unless self.class.const_defined?(:Pattern)
+
+				# ruby doesn't consider the class body of
+				# subclasses to be in this scope. weird.
+				self.class.const_get(:Pattern)
+			end
+
+
+			def match(str)
+				pat = pattern
+
+				# If the pattern contains no captures, wrap
+				# it in parenthesis to captures the whole
+				# thing. This is vanity, so we can omit
+				# the parenthesis from the Patterns of
+				# simple Token subclasses.
+				pat = "(" + pat + ")"\
+					unless pat.index "("
+
+				# attempt to match this class's patten wedged
+				# between delimiters, and return the MatchData
+				# wrapped in Fuzz::Match or nil (no match)
+				del = "(" + Fuzz::Delimiter + ")"
+				m = str.match(Regexp.new(del + pat + del))
+				(m == nil) ? nil : Fuzz::Match.new(m)
+			end
+
+
+			def extract(str)
+
+				# attempt to match the token against _str_
+				# via Base#match, and abort it it failed
+				fm = match(str)
+				return nil\
+					if fm.nil?
+
+				# return the Fuzz::Match and _str_ with the matched
+				# token replace by Fuzz::Replacement, to continue parsing
+				[fm, fm.match_data.pre_match + Fuzz::Replacement + fm.match_data.post_match]
+			end
+
+
+			def extract!(str)
+
+				# call Token#extract first,
+				# and abort it if failed
+				ext = extract(str)
+				return nil\
+					if ext.nil?
+
+				# update the argument (the BANG warns
+				# of the danger of this operation...),
+				# and return the Fuzz::Match
+				str.replace(ext[1])
+				ext[0]
+			end
+		end
+	end
+end
diff --git a/lib/fuzz/token/letters.rb b/lib/fuzz/token/letters.rb
@@ -0,0 +1,8 @@
+#!/usr/bin/env ruby
+# vim: noet
+
+module Fuzz::Token
+	class Letters < Base
+		Pattern = "[a-z]+"
+	end
+end
diff --git a/spec/token.rb b/spec/token.rb
@@ -0,0 +1,104 @@
+#!/usr/bin/env ruby
+# vim: noet
+
+
+# import rspec
+require "rubygems"
+require "spec"
+
+# import the fuzz gem for testing
+dir = File.dirname(__FILE__)
+require "#{dir}/../lib/fuzz.rb"
+
+
+
+
+class AbcToken < Fuzz::Token::Base
+	Pattern = "[abc]+"
+end
+
+
+describe Fuzz::Token::Base do
+	it "refuses to initialize" do
+		lambda { Fuzz::Token::Base.new }.should raise_error
+	end
+
+	it "allows instances of subclasses" do
+		lambda { AbcToken.new }.should_not raise_error
+	end
+
+	it "returns nil when no match is found" do
+		AbcToken.new.match("xyzzy").should == nil
+	end
+
+	it "does not match tokens with no delimiters" do
+		AbcToken.new.match("12345aabbcc98765").should == nil
+	end
+
+	it "returns Fuzz::Match when a match is found" do
+		m = AbcToken.new.match("12345 aabbcc 98765")
+		m.class.should == Fuzz::Match
+	end
+
+
+	describe "(extract)" do
+		before(:each) do
+			@abc = AbcToken.new
+		end
+
+		it "returns Array of Fuzz::Match and String" do
+			e = @abc.extract("zzz a yyy")
+			e.class.should == Array
+			e[0].class.should == Fuzz::Match
+			e[1].class.should == String
+		end
+
+		it "returns modified String" do
+			e = @abc.extract("zzz a yyy")
+			e[1].should == "zzz#{Fuzz::Replacement}yyy"
+		end
+
+		it "extracts multiple Tokens" do
+			ext1 = @abc.extract("zzz a b c yyy")
+			ext2 = @abc.extract(ext1[1])
+			ext3 = @abc.extract(ext2[1])
+
+			ext1[0].captures[0].should == "a"
+			ext2[0].captures[0].should == "b"
+			ext3[0].captures[0].should == "c"
+		end
+	end
+
+
+	describe "(extract!)" do
+		before(:each) do
+			@abc = AbcToken.new
+		end
+
+		it "modifies the argument" do
+			str = "xxx a yyy b zzz c yyy"
+			@abc.extract!(str)[0].should == "a"
+			@abc.extract!(str)[0].should == "b"
+			@abc.extract!(str)[0].should == "c"
+		end
+	end
+end
+
+
+describe Fuzz::Match do
+	before(:each) do
+		@m = AbcToken.new.match("qqq;;;aabbcc,,,www")
+	end
+
+	it "returns extracted token data" do
+		@m.captures.should == ["aabbcc"]
+	end
+
+	it "returns extracted delimiters" do
+		@m.delimiters.should == [";;;", ",,,"]
+	end
+
+	it "allows array syntax access to captures" do
+		@m[0].should == "aabbcc"
+	end
+end