Skip to content

Commit

Permalink
Naming consistency improvements and more specs.
Browse files Browse the repository at this point in the history
  • Loading branch information
louismullie committed Mar 11, 2012
1 parent e07c7b0 commit bb7b0c6
Show file tree
Hide file tree
Showing 42 changed files with 741 additions and 580 deletions.
27 changes: 26 additions & 1 deletion TODO
Expand Up @@ -15,7 +15,9 @@
- Save individual documents in a collection - Save individual documents in a collection
- Does it return self when using processors? - Does it return self when using processors?
- Same old value removal problem ? - Same old value removal problem ?
- Detect units in number
- 301 - 301
- Read autoselect


# Testing # Testing


Expand All @@ -38,4 +40,27 @@
- Tests for Wiki - Tests for Wiki
- Enju as a server - Enju as a server
- Sectionners - Sectionners
- Sentiment analysis - Sentiment analysis


# Code pad


# Find the lexical relations between words.
module Relations
extend Treat::Groupable
self.type = :annotator
self.targets = [:document, :zone, :sentence, :phrase]
self.preset_option = :relation
self.presets = [:hyponym_of, :hypernym_of,
:synonym_of, :antonym_of]
end

# Find the grammatical links between words.
module Linkages
extend Treat::Groupable
self.type = :annotator
self.targets = [:phrase]
self.preset_option = :linkage
self.presets = [:subject, :main_verb, :object]
end
1 change: 1 addition & 0 deletions lib/treat.rb
Expand Up @@ -42,6 +42,7 @@ class << self
require 'treat/kernel' require 'treat/kernel'
require 'treat/downloader' require 'treat/downloader'
require 'treat/languages' require 'treat/languages'
require 'treat/linguistics'
require 'treat/entities' require 'treat/entities'
require 'treat/categories' require 'treat/categories'
require 'treat/data_set' require 'treat/data_set'
Expand Down
8 changes: 5 additions & 3 deletions lib/treat/entities/abilities/buildable.rb
Expand Up @@ -9,7 +9,7 @@ module Treat::Entities::Abilities::Buildable
# Simple regexps to match common entities. # Simple regexps to match common entities.
WordRegexp = /^[[:alpha:]\-']+$/ WordRegexp = /^[[:alpha:]\-']+$/
NumberRegexp = /^#?([0-9]+)(\^\^[0-9]+)?$/ NumberRegexp = /^#?([0-9]+)(\^\^[0-9]+)?$/
PunctRegexp = /^[[:punct:]]+$/ PunctRegexp = /^[[:punct:]\$]+$/
UriRegexp = /^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$/ix UriRegexp = /^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$/ix
EmailRegexp = /.+\@.+\..+/ EmailRegexp = /.+\@.+\..+/
ExtensionRegexp = /^.*?\.([a-zA-Z0-9]{2,5})$/ ExtensionRegexp = /^.*?\.([a-zA-Z0-9]{2,5})$/
Expand All @@ -27,7 +27,7 @@ def build(file_or_value, options = {})


if fv =~ UriRegexp if fv =~ UriRegexp
from_url(file_or_value, options) from_url(file_or_value, options)
elsif File.readable?(fv) elsif !(fv == '.') && File.readable?(fv)
if FileTest.directory?(fv) if FileTest.directory?(fv)
from_folder(file_or_value, options) from_folder(file_or_value, options)
else else
Expand All @@ -50,6 +50,9 @@ def build(file_or_value, options = {})
# is user-created (i.e. by calling build # is user-created (i.e. by calling build
# instead of from_string directly). # instead of from_string directly).
def from_string(string, enforce_type = false) def from_string(string, enforce_type = false)

Treat::Helpers::DecimalPointEscaper.escape!(string)

enforce_type = true if caller_method == :build enforce_type = true if caller_method == :build


unless self == Treat::Entities::Entity unless self == Treat::Entities::Entity
Expand Down Expand Up @@ -258,7 +261,6 @@ def phrase_from_string(string)
def token_from_string(string) def token_from_string(string)


check_encoding(string) check_encoding(string)

if string == "'s" || string == "'S" if string == "'s" || string == "'S"
Treat::Entities::Clitic.new(string) Treat::Entities::Clitic.new(string)
elsif string =~ WordRegexp && elsif string =~ WordRegexp &&
Expand Down
3 changes: 2 additions & 1 deletion lib/treat/entities/abilities/checkable.rb
Expand Up @@ -10,9 +10,10 @@ module Treat::Entities::Abilities::Checkable
def check_has(feature, do_it = true) def check_has(feature, do_it = true)
return @features[feature] if has?(feature) return @features[feature] if has?(feature)
return send(feature) if do_it return send(feature) if do_it
task = caller_method(2) task = caller_method(2) # This is dangerous !
g1 = Treat::Categories.lookup(task) g1 = Treat::Categories.lookup(task)
g2 = Treat::Categories.lookup(feature) g2 = Treat::Categories.lookup(feature)

raise Treat::Exception, raise Treat::Exception,
"#{g1.type.to_s.capitalize} #{task} " + "#{g1.type.to_s.capitalize} #{task} " +
"requires #{g2.type} #{g2.method}." "requires #{g2.type} #{g2.method}."
Expand Down
2 changes: 1 addition & 1 deletion lib/treat/entities/abilities/magical.rb
Expand Up @@ -25,7 +25,7 @@ module Treat::Entities::Abilities::Magical
def magic(sym, *args) def magic(sym, *args)


@@entities_regexp ||= "(#{Treat::Entities.list.join('|')})" @@entities_regexp ||= "(#{Treat::Entities.list.join('|')})"
@@cats_regexp ||= "(#{Treat::Languages::WordCategories.join('|')})" @@cats_regexp ||= "(#{Treat::Linguistics::WordCategories.join('|')})"


method = sym.to_s =~ /entities/ ? method = sym.to_s =~ /entities/ ?
sym.to_s.gsub('entities', 'entitys') : sym.to_s.gsub('entities', 'entitys') :
Expand Down
5 changes: 4 additions & 1 deletion lib/treat/entities/entities.rb
Expand Up @@ -56,7 +56,10 @@ class Word < Token
class Clitic < Token; end class Clitic < Token; end


# Represents a number. # Represents a number.
class Number < Token; end class Number < Token
def to_i; to_s.to_i; end
def to_f; to_s.to_f; end
end


# Represents a punctuation sign. # Represents a punctuation sign.
class Punctuation < Token; end class Punctuation < Token; end
Expand Down
2 changes: 1 addition & 1 deletion lib/treat/formatters/visualizers/standoff.rb
Expand Up @@ -44,7 +44,7 @@ def self.visualize(entity, options = {})
end end


def self.ptb_escape(val) def self.ptb_escape(val)
Treat::Languages::Tags:: Treat::Linguistics::Tags::
PTBEscapeCharacters.each do |char, esc| PTBEscapeCharacters.each do |char, esc|
val.gsub!(char, val) val.gsub!(char, val)
end end
Expand Down
13 changes: 12 additions & 1 deletion lib/treat/groupable.rb
Expand Up @@ -110,7 +110,18 @@ class << self
def self.method def self.method
return @method if @method return @method if @method
m = ucc(cl(self)).dup m = ucc(cl(self)).dup
if m[-4..-1] == 'iers' if m[-4..-1] == 'zers'
if type == :annotator
if m[-6] == 'l'
m[-5..-1] = ''
else
m[-5..-1] = 'y'
end
else
m = m[0..-3]
end
n = m
elsif m[-4..-1] == 'iers'
m[-4..-1] = 'y' m[-4..-1] = 'y'
n = m n = m
elsif m[-3..-1] == 'ers' elsif m[-3..-1] == 'ers'
Expand Down
10 changes: 5 additions & 5 deletions lib/treat/inflectors.rb
Expand Up @@ -3,15 +3,15 @@
module Treat::Inflectors module Treat::Inflectors


# Return the stem (*not root form*) of a word. # Return the stem (*not root form*) of a word.
module Stem module Stemmers
extend Treat::Groupable extend Treat::Groupable
self.type = :annotator self.type = :annotator
self.targets = [:word] self.targets = [:word]
end end


# Retrieve the different declensions of a # Retrieve the different declensions of a
# noun (singular, plural). # noun (singular, plural).
module Declensions module Declensors
extend Treat::Groupable extend Treat::Groupable
self.type = :annotator self.type = :annotator
self.targets = [:word] self.targets = [:word]
Expand All @@ -21,7 +21,7 @@ module Declensions


# Retrieve the different conjugations of a word # Retrieve the different conjugations of a word
# given a mode, tense, person, and/or number. # given a mode, tense, person, and/or number.
module Conjugations module Conjugators
extend Treat::Groupable extend Treat::Groupable
self.type = :annotator self.type = :annotator
self.targets = [:word] self.targets = [:word]
Expand All @@ -32,15 +32,15 @@ module Conjugations


# Retrieve the full text description of a # Retrieve the full text description of a
# cardinal number. # cardinal number.
module CardinalForm module Cardinalizers
extend Treat::Groupable extend Treat::Groupable
self.type = :annotator self.type = :annotator
self.targets = [:number] self.targets = [:number]
end end


# Retrieve the full text description of an # Retrieve the full text description of an
# ordinal number. # ordinal number.
module OrdinalForm module Ordinalizers
extend Treat::Groupable extend Treat::Groupable
self.type = :annotator self.type = :annotator
self.targets = [:number] self.targets = [:number]
Expand Down
Expand Up @@ -3,7 +3,7 @@
# number in words in cardinal form. # number in words in cardinal form.
# #
# Project website: http://deveiate.org/projects/Linguistics/ # Project website: http://deveiate.org/projects/Linguistics/
module Treat::Inflectors::CardinalForm::Linguistics module Treat::Inflectors::Cardinalizers::Linguistics


require 'treat/loaders/linguistics' require 'treat/loaders/linguistics'


Expand Down Expand Up @@ -31,7 +31,7 @@ module Treat::Inflectors::CardinalForm::Linguistics
# as an array of word groups instead of a String. # as an array of word groups instead of a String.
# #
# More specific options when using :type => :ordinal: # More specific options when using :type => :ordinal:
def self.cardinal_form(entity, options = {}) def self.cardinal(entity, options = {})
Treat::Loaders::Linguistics. Treat::Loaders::Linguistics.
load(entity.language). load(entity.language).
numwords(entity.to_s, options) numwords(entity.to_s, options)
Expand Down
Expand Up @@ -2,7 +2,7 @@
# in the 'linguistics' gem that allow to conjugate verbs. # in the 'linguistics' gem that allow to conjugate verbs.
# #
# Project website: http://deveiate.org/projects/Linguistics/ # Project website: http://deveiate.org/projects/Linguistics/
module Treat::Inflectors::Conjugations::Linguistics module Treat::Inflectors::Conjugators::Linguistics


require 'treat/loaders/linguistics' require 'treat/loaders/linguistics'


Expand All @@ -29,7 +29,7 @@ module Treat::Inflectors::Conjugations::Linguistics
# - (Symbol) :count => :singular, :plural # - (Symbol) :count => :singular, :plural
# - (Symbol) :person => :first, :second, :third # - (Symbol) :person => :first, :second, :third
# #
def self.conjugations(entity, options = {}) def self.conjugate(entity, options = {})


options = DefaultOptions.merge(options) options = DefaultOptions.merge(options)
cat = entity.check_has(:category) cat = entity.check_has(:category)
Expand Down
31 changes: 31 additions & 0 deletions lib/treat/inflectors/declensors/active_support.rb
@@ -0,0 +1,31 @@
# This class is a wrapper for the ActiveSupport
# declension tools.
class Treat::Inflectors::Declensors::English

require 'active_support/inflector/inflections'

# Declense a word using ActiveSupport::Inflector::Inflections
def self.declense(entity, options)

cat = entity.check_has(:category)
unless [:noun, :adjective, :determiner].
include?(cat)
return
end

unless options[:count]
raise Treat::Exception,
"Must supply option count (:singular or :plural)."
end

string = entity.to_s

if options[:count] == :plural
ActiveSupport::Inflector::Inflections.pluralize(string)
elsif options[:count] == :singular
ActiveSupport::Inflector::Inflections.singularize(string)
end

end

end
Expand Up @@ -5,14 +5,14 @@
# Released under the MIT License. # Released under the MIT License.
# #
# http://english.rubyforge.org # http://english.rubyforge.org
class Treat::Inflectors::Declensions::English class Treat::Inflectors::Declensors::English


require 'treat/inflectors/declensions/english/inflect' require 'treat/inflectors/declensors/english/inflect'


# Retrieve the declensions (singular, plural) # Retrieve the declensions (singular, plural)
# of an english word using a class lifted from # of an english word using a class lifted from
# the 'english' ruby gem. # the 'english' ruby gem.
def self.declensions(entity, options) def self.declense(entity, options)


cat = entity.check_has(:category) cat = entity.check_has(:category)
unless [:noun, :adjective, :determiner]. unless [:noun, :adjective, :determiner].
Expand Down
Expand Up @@ -5,7 +5,7 @@
# Released under the MIT License. # Released under the MIT License.
# #
# http://english.rubyforge.org # http://english.rubyforge.org
module Treat::Inflectors::Declensions::English::Inflect module Treat::Inflectors::Declensors::English::Inflect


@singular_of = {} @singular_of = {}
@plural_of = {} @plural_of = {}
Expand Down
Expand Up @@ -3,7 +3,7 @@
# declensions of a word. # declensions of a word.
# #
# Project website: http://deveiate.org/projects/Linguistics/ # Project website: http://deveiate.org/projects/Linguistics/
class Treat::Inflectors::Declensions::Linguistics class Treat::Inflectors::Declensors::Linguistics


require 'treat/loaders/linguistics' require 'treat/loaders/linguistics'


Expand All @@ -12,7 +12,7 @@ class Treat::Inflectors::Declensions::Linguistics
# Options: # Options:
# #
# - (Identifier) :count => :singular, :plural # - (Identifier) :count => :singular, :plural
def self.declensions(entity, options = {}) def self.declense(entity, options = {})


cat = entity.check_has(:category) cat = entity.check_has(:category)
unless [:noun, :adjective, :determiner]. unless [:noun, :adjective, :determiner].
Expand Down
Expand Up @@ -3,13 +3,13 @@
# number in words in ordinal form. # number in words in ordinal form.
# #
# Project website: http://deveiate.org/projects/Linguistics/ # Project website: http://deveiate.org/projects/Linguistics/
class Treat::Inflectors::OrdinalForm::Linguistics class Treat::Inflectors::Ordinalizers::Linguistics


require 'treat/loaders/linguistics' require 'treat/loaders/linguistics'


# Desribe a number in words in ordinal form, using the # Desribe a number in words in ordinal form, using the
# 'linguistics' gem. # 'linguistics' gem.
def self.ordinal_form(number, options = {}) def self.ordinal(number, options = {})
klass = Treat::Loaders::Linguistics.load(number.language) klass = Treat::Loaders::Linguistics.load(number.language)
klass.ordinate(number.to_s) klass.ordinate(number.to_s)
end end
Expand Down
Expand Up @@ -2,15 +2,15 @@
# Porter stemming algorithm, ported to Ruby from a # Porter stemming algorithm, ported to Ruby from a
# version coded up in Perl. This is a simplified # version coded up in Perl. This is a simplified
# implementation; for a true and fast Porter stemmer, # implementation; for a true and fast Porter stemmer,
# see Treat::Inflectors::Stem::PorterC. # see Treat::Inflectors::Stemmers::PorterC.
# #
# Authored by Ray Pereda (raypereda@hotmail.com). # Authored by Ray Pereda (raypereda@hotmail.com).
# Unknown license. # Unknown license.
# #
# Original paper: Porter, 1980. An algorithm for suffix stripping, # Original paper: Porter, 1980. An algorithm for suffix stripping,
# Program, Vol. 14, no. 3, pp 130-137, # Program, Vol. 14, no. 3, pp 130-137,
# Original C implementation: http://www.tartarus.org/~martin/PorterStemmer. # Original C implementation: http://www.tartarus.org/~martin/PorterStemmer.
class Treat::Inflectors::Stem::Porter class Treat::Inflectors::Stemmers::Porter


# Returns the stem of a word using a native Porter stemmer. # Returns the stem of a word using a native Porter stemmer.
# #
Expand Down
Expand Up @@ -5,7 +5,7 @@
# Original paper: Porter, 1980. An algorithm for suffix stripping, # Original paper: Porter, 1980. An algorithm for suffix stripping,
# Program, Vol. 14, no. 3, pp 130-137, # Program, Vol. 14, no. 3, pp 130-137,
# Original C implementation: http://www.tartarus.org/~martin/PorterStemmer. # Original C implementation: http://www.tartarus.org/~martin/PorterStemmer.
module Treat::Inflectors::Stem::PorterC module Treat::Inflectors::Stemmers::PorterC


# Require the 'ruby-stemmer' gem. # Require the 'ruby-stemmer' gem.
silence_warnings { require 'lingua/stemmer' } silence_warnings { require 'lingua/stemmer' }
Expand Down
Expand Up @@ -10,7 +10,7 @@
# Original paper: Jenkins, Marie-Claire, Smith, Dan, # Original paper: Jenkins, Marie-Claire, Smith, Dan,
# Conservative stemming for search and indexing, 2005. # Conservative stemming for search and indexing, 2005.
# http://www.uea.ac.uk/polopoly_fs/1.85493!stemmer25feb.pdf # http://www.uea.ac.uk/polopoly_fs/1.85493!stemmer25feb.pdf
class Treat::Inflectors::Stem::UEA class Treat::Inflectors::Stemmers::UEA


# Require the 'uea-stemmer' gem. # Require the 'uea-stemmer' gem.
silence_warnings { require 'uea-stemmer' } silence_warnings { require 'uea-stemmer' }
Expand Down
8 changes: 0 additions & 8 deletions lib/treat/languages.rb
Expand Up @@ -125,14 +125,6 @@ def self.get_languages
@@loaded = true @@loaded = true
end end


# A list of all possible word categories.
WordCategories = [
:adjective, :adverb, :noun, :verb, :interjection,
:clitic, :coverb, :conjunction, :determiner, :particle,
:preposition, :pronoun, :number, :symbol, :punctuation,
:complementizer
]

# Get the language list. # Get the language list.
get_languages get_languages


Expand Down

0 comments on commit bb7b0c6

Please sign in to comment.