Skip to content

Commit

Permalink
Added tests for time extractors and indexers/searchers.
Browse files Browse the repository at this point in the history
  • Loading branch information
louismullie committed Mar 12, 2012
1 parent bb7b0c6 commit a0ccc40
Show file tree
Hide file tree
Showing 12 changed files with 87 additions and 111 deletions.
1 change: 0 additions & 1 deletion .rspec
@@ -1,3 +1,2 @@
--colour
--format s -c
--order rand
2 changes: 1 addition & 1 deletion lib/treat/entities/abilities/delegatable.rb
Expand Up @@ -73,7 +73,7 @@ def call_worker(entity, task, worker, group, options)
end

if group.type == :transformer
self
entity
else
result
end
Expand Down
2 changes: 1 addition & 1 deletion lib/treat/entities/abilities/iterable.rb
Expand Up @@ -6,7 +6,7 @@ module Treat::Entities::Abilities::Iterable
# #each. It does not yield the top element being
# recursed.
#
# This function NEEDS to be ported to C (see source).
# This function NEEDS to be ported to C.
def each_entity(*types)
types = [:entity] if types.size == 0
f = false
Expand Down
2 changes: 1 addition & 1 deletion lib/treat/extractors/time/chronic.rb
Expand Up @@ -24,7 +24,7 @@ def self.time(entity, options = {})
time = ::Chronic.parse(s, {:guess => true})
end

if remove_time_from_ancestors(entity, time)
if entity.has_parent? && remove_time_from_ancestors(entity, time)
nil
else
time
Expand Down
3 changes: 2 additions & 1 deletion lib/treat/extractors/time/nickel.rb
Expand Up @@ -64,7 +64,8 @@ def self.time(entity, options = {})

return unless start_time

if remove_time_from_ancestors(entity, start_time)
if entity.has_parent? &&
remove_time_from_ancestors(entity, start_time)
nil
else
entity.set :time_recurrence,
Expand Down
3 changes: 2 additions & 1 deletion lib/treat/extractors/time/ruby.rb
Expand Up @@ -16,7 +16,8 @@ def self.time(entity, options = {})
return if s =~ /^[0-9]+$/
begin
time = ::DateTime.parse(s)
if remove_time_from_ancestors(entity, time)
if entity.has_parent? &&
remove_time_from_ancestors(entity, time)
nil
else
time
Expand Down
5 changes: 2 additions & 3 deletions lib/treat/languages/english.rb
Expand Up @@ -4,11 +4,10 @@ class Treat::Languages::English
OptionalDependencies = ['uea-stemmer', 'engtagger', 'active_support', 'english']

Extractors = {
:time => [:nickel],
:date => [:chronic, :ruby],
:time => [:chronic, :ruby, :nickel],
:topics => [:reuters],
:topic_words => [:lda],
:keywords => [:tf_idf, :topics_tf_idf],
:keywords => [:tf_idf],
:name_tag => [:stanford],
:coreferences => [:stanford],
:roles => [:naive]
Expand Down
20 changes: 16 additions & 4 deletions spec/collection.rb
Expand Up @@ -3,8 +3,8 @@
describe Treat::Entities::Collection do

before :all do
file = Treat.spec + 'samples/mathematicians'
@collection = Treat::Entities::Collection.build(file)
@file = Treat.spec + 'samples/mathematicians'
@collection = Treat::Entities::Collection.build(@file)
end

describe "Buildable" do
Expand Down Expand Up @@ -40,16 +40,28 @@

it "indexes the collection and stores the index " +
"in the folder .index inside the collection's folder " do


@collection.index
@collection.index.should eql @file + '/.index'
FileTest.directory?(@file + '/.index').should eql true

end

end

describe "#search" do

it "searches an indexed collection for a query " +
"and returns a list of documents containing a " +
"and returns an array of documents containing a " +
"match for the given query " do

docs = @collection.search(:q => 'Newton')
docs.size.should eql 4
docs.map { |d| d.chunk.title.to_s }.should
eql ["Isaac (Sir) Newton (1642-1727)",
"Gottfried Leibniz (1646-1716)",
"Leonhard Euler (1707-1783)",
"Archimedes of Syracuse (287-212 BC)"]

end

Expand Down
57 changes: 28 additions & 29 deletions spec/document.rb
Expand Up @@ -94,39 +94,38 @@

=begin
module Treat
module Tests
class TestFormatters < Test::Unit::TestCase
def setup
@doc = Treat::Tests::English::ShortDoc
@sentence = Treat::Tests::English::Sentence
end
def test_serializers_and_unserializers
# Test roundtrip Ruby -> YAML -> Ruby -> YAML
create_temp_file('yml') do |tmp|
@doc.serialize(:yaml, :file => tmp)
doc = Treat::Entities::Document(tmp)
assert_equal File.read(tmp).length,
doc.serialize(:yaml).length
end
# Test roundtrip Ruby -> XML -> Ruby -> XML.
create_temp_file('xml') do |tmp|
@doc.serialize(:xml, :file => tmp)
doc = Treat::Entities::Document(tmp)
assert_equal File.read(tmp).length,
doc.serialize(:xml).length
end
end
def test_readers
# This is done by loading a collection with all types of texts.
end
def test_serializers_and_unserializers
# Test roundtrip Ruby -> YAML -> Ruby -> YAML
create_temp_file('yml') do |tmp|
@doc.serialize(:yaml, :file => tmp)
doc = Treat::Entities::Document(tmp)
assert_equal File.read(tmp).length,
doc.serialize(:yaml).length
end
# Test roundtrip Ruby -> XML -> Ruby -> XML.
create_temp_file('xml') do |tmp|
@doc.serialize(:xml, :file => tmp)
doc = Treat::Entities::Document(tmp)
assert_equal File.read(tmp).length,
doc.serialize(:xml).length
end
end
end
def test_keywords
assert_nothing_raised do
topics = @col.topic_words(:lda)
@doc.keywords(:topics_frequency, :topic_words => topics)
end
end
def test_statistics
@doc.chunk.segment(:tactful).tokenize
assert_equal 1, @word.frequency_in(:document)
assert_nothing_raised { @word.tf_idf ; puts @word.tf_idf }
# assert_nothing_raised { @doc.statistics(:position_in) }
# assert_nothing_raised { @doc.statistics(:transition_matrix) }
# assert_nothing_raised { @doc.statistics(:transition_probability) }
end
=end
14 changes: 7 additions & 7 deletions spec/entity.rb
Expand Up @@ -15,23 +15,18 @@
@det = Treat::Entities::Word.new('The')
@det.set :category, :determiner
@det.set :tag, 'DT'
@det.set :tag_set, :penn
@adj = Treat::Entities::Word.new('lazy')
@adj.set :category, :adjective
@adj.set :tag, 'JJ'
@adj.set :tag_set, :penn
@noun = Treat::Entities::Word.new('fox')
@noun.set :category, :noun
@noun.set :tag, 'NN'
@noun.set :tag_set, :penn
@aux = Treat::Entities::Word.new('is')
@aux.set :category, :verb
@aux.set :tag, 'VBZ'
@aux.set :tag_set, :penn
@verb = Treat::Entities::Word.new('running')
@verb.set :category, :verb
@verb.set :tag, 'VBG'
@verb.set :tag_set, :penn
@dot = Treat::Entities::Punctuation.new('.')
@dot.set :tag, '.'
@paragraph << @sentence << [@noun_phrase, @verb_phrase, @dot]
Expand Down Expand Up @@ -146,7 +141,7 @@
end
end

describe "#each_entity(&entity_types) { |entity| ... }" do
describe "#each_entity(*entity_types) { |entity| ... }" do

context "when called with no arguments" do
it "recursively yields each element in " +
Expand Down Expand Up @@ -282,7 +277,12 @@

end

describe "Registrable" do
describe "Iterable" do

describe "#each_entity(*types)" do

it "y"
end

end

Expand Down
4 changes: 3 additions & 1 deletion spec/languages.rb
Expand Up @@ -13,11 +13,13 @@

end

describe "#describe" do
describe "#describe(code)" do

it "returns a lowercase identifier representing the " +
"full name of a language, given its ISO-639-1/2 code." do
Treat::Languages.describe(:eng).should eql :english
end

end

end
85 changes: 24 additions & 61 deletions spec/phrase.rb
Expand Up @@ -7,31 +7,49 @@
describe "#build" do

context "when supplied with a sentence" do

it "creates a sentence with the text" do
sentence = "This is a sentence."
s = Treat::Entities::Phrase.build(sentence)
s.type.should eql :sentence
s.to_s.should eql sentence
end

end

context "when supplied with a phrase" do

it "creates a phrase with the text" do
phrase = "this is a phrase"
p = Treat::Entities::Phrase.build(phrase)
p.type.should eql :phrase
p.to_s.should eql phrase
end

end

end

end

describe "Extractable" do

describe "#named_entity" do
it "tags the named entity words in the phrase" do

end

end

describe "#time" do
it "returns a DateTime object representing the time in the phrase" do
Treat::Languages::English::Extractors[:time].each do |e|
t = 'Tuesday, January 5th 2011'.time(e)
t.year.should eql 2011
end
end
end
end

describe "Processable" do

describe "#tokenize" do
Expand Down Expand Up @@ -124,60 +142,5 @@
end

end

end

=begin
# encoding: utf-8
module Treat
module Tests
class TestExtractors < Test::Unit::TestCase
def setup
@time = Treat::Tests::English::Time
@date = Treat::Tests::English::Date
@doc = Treat::Tests::English::LongDoc
@word = Treat::Tests::English::Word
@col = Treat::Tests::English::Collection
end
def test_time
assert_nothing_raised { @time.time(:nickel) }
end
def test_date
assert_equal 2011, @date.date(:chronic).year
assert_equal 2011, @date.date(:ruby).year
end
def test_topic_words
assert_nothing_raised { @col.topic_words(:lda) }
end
def test_named_entity
p = 'Angela Merkel and Nicolas Sarkozy were the first ones to board the p'
assert_nothing_raised { @doc.named_entity(:stanford) }
end
def test_keywords
assert_nothing_raised do
topics = @col.topic_words(:lda)
@doc.keywords(:topics_frequency, :topic_words => topics)
end
end
def test_statistics
@doc.chunk.segment(:tactful).tokenize
assert_equal 1, @word.frequency_in(:document)
assert_nothing_raised { @word.tf_idf ; puts @word.tf_idf }
# assert_nothing_raised { @doc.statistics(:position_in) }
# assert_nothing_raised { @doc.statistics(:transition_matrix) }
# assert_nothing_raised { @doc.statistics(:transition_probability) }
end
end
end
end

=end
end

0 comments on commit a0ccc40

Please sign in to comment.