Skip to content

Commit

Permalink
hacked on everything, made it better
Browse files Browse the repository at this point in the history
  • Loading branch information
adammck committed Jan 5, 2009
1 parent 1498d7d commit 01f77d3
Show file tree
Hide file tree
Showing 11 changed files with 484 additions and 50 deletions.
8 changes: 8 additions & 0 deletions lib/fuzz.rb
Expand Up @@ -16,5 +16,13 @@ module Fuzz
end

dir = File.dirname(__FILE__)

# import the core classes
require "#{dir}/fuzz/token.rb"
require "#{dir}/fuzz/match.rb"
require "#{dir}/fuzz/parser.rb"
require "#{dir}/fuzz/errors.rb"

# and some common token classes
require "#{dir}/fuzz/token/gender.rb"
require "#{dir}/fuzz/token/age.rb"
16 changes: 16 additions & 0 deletions lib/fuzz/errors.rb
@@ -0,0 +1,16 @@
#!/usr/bin/env ruby
# vim: noet

module Fuzz
module Error

class FuzzError < StandardError
end

# Raised by Fuzz::Parser when results
# are accessed before they have been
# built (by calling Fuzz::Parser#parse)
class NotParsedYet < FuzzError
end
end
end
32 changes: 27 additions & 5 deletions lib/fuzz/match.rb
@@ -1,15 +1,25 @@
#!/usr/bin/env ruby
# vim: noet

$spec = "../../spec/match.rb"


module Fuzz
class Match
attr_reader :match_data, :captures, :delimiters
attr_reader :token, :match_data, :captures, :delimiters

def initialize(md)
def initialize(token, md)
@token = token
@match_data = md
cap = md.captures

# Break the captures from the delimiters
# insist on receiving a Token,
# since this class doesn't do
# anything useful without it
raise RuntimeError\
unless token.is_a?(Fuzz::Token::Base)

# break the captures from the delimiters
# (the first and last) and token (others)
# into their own accessors. Most of the
# time, we're not interested capturing
Expand All @@ -19,8 +29,20 @@ def initialize(md)
@captures = cap
end

def [](index)
@captures[index]
# Returns the captures encapsulated by this
# object after being normalized by the related
# Token object, to transform raw captured strings
# into useful semantic data. See: Token#normalize.
def value
begin
token.normalize(*@captures)

# if the normalize failed with ArgumentError, it's
# probably because the method was expecting a different
# number of captures, which indicates a broken regex
rescue ArgumentError => err
raise ArgumentError.new("Normalize failed for #{cap.inspect} via #{token.inspect}")
end
end
end
end
93 changes: 93 additions & 0 deletions lib/fuzz/parser.rb
@@ -0,0 +1,93 @@
#!/usr/bin/env ruby
# vim: noet

$spec = "../../spec/parser.rb"


module Fuzz
class Parser
attr_reader :tokens

def initialize
@tokens = []
@matches = nil
end

def add_token(title, token, *init_args)

# resolve symbolic types into
# a predefined token class
token = Fuzz::Token.const_get(camelize(token))\
if token.is_a?(Symbol)

# resolve classes into
# instances if necessary
token = token.new(title, *init_args)\
if token.is_a?(Class)

@tokens.push(token)
end

def parse(str)
@matches = []
summary = {}

@tokens.each do |token|
unless(extracted = token.extract!(str)).nil?
summary[token.name] = extracted.value
@matches.push(extracted)
end
end

# store the remains of the input, in
# case we want to do something useful
# with it (like refer it to a human)
@unparsed_str = str

# return nil for no matches, or hash
# containing a summary of the matches
(summary.length == 0) ? nil : summary
end

# Returns an array of the tokens matched
# by the parser, or raises NotParsedYet
# if _parse_ has not been called yet.
def matches
raise_unless_parsed
@matches
end

# Returns an Array containing the parts
# of the parsed string that were not captured
# and normalized into useful data by _parse_.
#
# p = Fuzz::Parser.new
# p.add_token "age", :age
#
# p.parse("13 year old")
# p.remainder => []
#
# p.parse("13 blah blah")
# p.remainder => ["blah blah"]
#
def unparsed
raise_unless_parsed
@unparsed_str.split(Fuzz::Replacement)
end

private

def camelize(sym)
sym.to_s.gsub(/(?:\A|_)(.)/) { $1.upcase }
end

# Raised NotParsedYet unless @matches
# has been populated by _parse_, to
# be called by methods that don't make
# sense until something has been parsed.
def raise_unless_parsed
raise Fuzz::Error::NotParsedYet\
if @matches.nil?
end
end
end
118 changes: 99 additions & 19 deletions lib/fuzz/token.rb
@@ -1,12 +1,40 @@
#!/usr/bin/env ruby
# vim: noet

$spec = "../../spec/token.rb"


module Fuzz
module Token
class Base


def initialize
def self.defined_types
subclasses = []
base = Fuzz::Token::Base
ObjectSpace.each_object(Class) do |klass|
if klass.ancestors.include?(base) and klass != base
subclasses.push(klass)
end
end
end


attr_reader :title, :options

def initialize(title=nil, options={})
@title = title

# if this token class has predefined
# options, then store them overridden
# by the given options
if self.class.const_defined?(:Options)
@options = self.class.const_get(:Options).merge(options)

# otherwise, just store the
# options as they were given
else
@options = options
end

# this class serves no purpose
# by itself, because it will
Expand All @@ -16,22 +44,31 @@ def initialize
"instantiated directly. Use a subclass instead"
end
end

# Returns an identifier for this token based upon
# the name, which is safe to use as a Hash key, by
# stripping non-alphanumerics and converting spaces
# to underscores. (Technically, any string (or Object)
# is safe to use as a Hash key, but it's ugly, and
# this is not.)
#
# SampleToken.new("Age of Child").name => :age_of_child
# SampleToken.new("It's a Weird Token Name!").name => :its_a_weird_token_name
#
def name
title.downcase.gsub(/\s+/, "_").gsub(/[^a-z0-9_]/i, "").to_sym
end

# Returns the pattern (a string regex chunk)
# matched by this class, or raises RuntimeError
# if none is available.

# Returns the pattern (a Regex) matched by this
# class, or raises RuntimeError if none is available.
def pattern
raise RuntimeError.new("#{self.class} has no pattern")\
unless self.class.const_defined?(:Pattern)

# ruby doesn't consider the class body of
# subclasses to be in this scope. weird.
self.class.const_get(:Pattern)
end


def match(str)
pat = pattern
pat = self.class.const_get(:Pattern)

# If the pattern contains no captures, wrap
# it in parenthesis to captures the whole
Expand All @@ -41,12 +78,54 @@ def match(str)
pat = "(" + pat + ")"\
unless pat.index "("

# attempt to match this class's patten wedged
# between delimiters, and return the MatchData
# wrapped in Fuzz::Match or nil (no match)
# return the patten wedged between delimiters,
# to avoid matching within other token bodies
del = "(" + Fuzz::Delimiter + ")"
m = str.match(Regexp.new(del + pat + del))
(m == nil) ? nil : Fuzz::Match.new(m)
Regexp.new(del + pat + del)
end


def match(str)

# perform the initial match by comparing
# the string with this classes regex, and
# abort if nothing matches
md = str.match(pattern)
return nil if md.nil?

# wrap the return value in Fuzz::Match, to
# provide much more useful access than the
# raw MatchData from the regex
Fuzz::Match.new(self, md)
end


# Returns the "normalized" result of the given
# strings captured by this class's Pattern by
# the _match_ method, excluding delimiters.
#
# This method provides a boring default behavior,
# which is to return nil for no captures, String
# for a single capture, or Array for multiple.
# Most subclasses should overload this, to return a
# more semantic value (like a DateTime, Weight, etc)
#
# t = SampleToken.new("My Token")
# t.normalize("beta", "gamma") => ["beta", "gamma"]
# t.normalize("alpha") => "alpha"
# t.normalize => nil
#
def normalize(*captures)
if captures.length == 0
return nil

elsif captures.length == 1
return captures[0]

# default: return as-is, and leave for
# the receiver to deal with. tokens doing
# this should probably overload this method.
else; return captures; end
end


Expand All @@ -55,12 +134,13 @@ def extract(str)
# attempt to match the token against _str_
# via Base#match, and abort it it failed
fm = match(str)
return nil\
if fm.nil?
return nil if fm.nil?
m = fm.match_data

# return the Fuzz::Match and _str_ with the matched
# token replace by Fuzz::Replacement, to continue parsing
[fm, fm.match_data.pre_match + Fuzz::Replacement + fm.match_data.post_match]
join = ((!m.pre_match.empty? && !m.post_match.empty?) ? Fuzz::Replacement : "")
[fm, m.pre_match + join + m.post_match]
end


Expand Down
38 changes: 38 additions & 0 deletions lib/fuzz/token/age.rb
@@ -0,0 +1,38 @@
#!/usr/bin/env ruby
# vim: noet

module Fuzz::Token
class Age < Base

Prefix = '(?:aged?\s*)?'
Meat = '(\d+)'
Suffix = '(?:\s*(?:years? old|years?|yrs?|y/?o))?'

# create one big ugly regex
Pattern = Prefix + Meat + Suffix

# set reasonable boundaries as
# default, which can be overridden
# at initialization
Options = {
:min => 1,
:max => 99
}

# convert the long numeric
# capture into an integer
def normalize(age_str)
age_str.to_i
end

# various ways of specifying someones age
Examples = {
"1" => 1,
"2 year" => 2,
"3 years" => 3,
"4 years old" => 4,
"age 5" => 5,
"aged 6 years" => 6,
"999 years old" => 999 }
end
end
17 changes: 17 additions & 0 deletions lib/fuzz/token/digits.rb
@@ -0,0 +1,17 @@
#!/usr/bin/env ruby
# vim: noet

module Fuzz::Token
class Number < Base
Pattern = '\d+'

# convert captured digits
# into a fixnum object
def normalize(digits_str)
digits_str.to_i
end

Examples = {
"123" => 123 }
end
end

0 comments on commit 01f77d3

Please sign in to comment.