Permalink
Browse files

Added tests for time extractors and indexers/searchers.

  • Loading branch information...
1 parent bb7b0c6 commit a0ccc40da510d1c53da8960d32f3cfcdaf955c44 @louismullie committed Mar 12, 2012
View
1 .rspec
@@ -1,3 +1,2 @@
---colour
--format s -c
--order rand
@@ -73,7 +73,7 @@ def call_worker(entity, task, worker, group, options)
end
if group.type == :transformer
- self
+ entity
else
result
end
@@ -6,7 +6,7 @@ module Treat::Entities::Abilities::Iterable
# #each. It does not yield the top element being
# recursed.
#
- # This function NEEDS to be ported to C (see source).
+ # This function NEEDS to be ported to C.
def each_entity(*types)
types = [:entity] if types.size == 0
f = false
@@ -24,7 +24,7 @@ def self.time(entity, options = {})
time = ::Chronic.parse(s, {:guess => true})
end
- if remove_time_from_ancestors(entity, time)
+ if entity.has_parent? && remove_time_from_ancestors(entity, time)
nil
else
time
@@ -64,7 +64,8 @@ def self.time(entity, options = {})
return unless start_time
- if remove_time_from_ancestors(entity, start_time)
+ if entity.has_parent? &&
+ remove_time_from_ancestors(entity, start_time)
nil
else
entity.set :time_recurrence,
@@ -16,7 +16,8 @@ def self.time(entity, options = {})
return if s =~ /^[0-9]+$/
begin
time = ::DateTime.parse(s)
- if remove_time_from_ancestors(entity, time)
+ if entity.has_parent? &&
+ remove_time_from_ancestors(entity, time)
nil
else
time
@@ -4,11 +4,10 @@ class Treat::Languages::English
OptionalDependencies = ['uea-stemmer', 'engtagger', 'active_support', 'english']
Extractors = {
- :time => [:nickel],
- :date => [:chronic, :ruby],
+ :time => [:chronic, :ruby, :nickel],
:topics => [:reuters],
:topic_words => [:lda],
- :keywords => [:tf_idf, :topics_tf_idf],
+ :keywords => [:tf_idf],
:name_tag => [:stanford],
:coreferences => [:stanford],
:roles => [:naive]
View
@@ -3,8 +3,8 @@
describe Treat::Entities::Collection do
before :all do
- file = Treat.spec + 'samples/mathematicians'
- @collection = Treat::Entities::Collection.build(file)
+ @file = Treat.spec + 'samples/mathematicians'
+ @collection = Treat::Entities::Collection.build(@file)
end
describe "Buildable" do
@@ -40,16 +40,28 @@
it "indexes the collection and stores the index " +
"in the folder .index inside the collection's folder " do
-
+
+ @collection.index
+ @collection.index.should eql @file + '/.index'
+ FileTest.directory?(@file + '/.index').should eql true
+
end
end
describe "#search" do
it "searches an indexed collection for a query " +
- "and returns a list of documents containing a " +
+ "and returns an array of documents containing a " +
"match for the given query " do
+
+ docs = @collection.search(:q => 'Newton')
+ docs.size.should eql 4
+ docs.map { |d| d.chunk.title.to_s }.should
+ eql ["Isaac (Sir) Newton (1642-1727)",
+ "Gottfried Leibniz (1646-1716)",
+ "Leonhard Euler (1707-1783)",
+ "Archimedes of Syracuse (287-212 BC)"]
end
View
@@ -94,39 +94,38 @@
=begin
-module Treat
- module Tests
- class TestFormatters < Test::Unit::TestCase
-
- def setup
- @doc = Treat::Tests::English::ShortDoc
- @sentence = Treat::Tests::English::Sentence
- end
+def test_serializers_and_unserializers
+ # Test roundtrip Ruby -> YAML -> Ruby -> YAML
+ create_temp_file('yml') do |tmp|
+ @doc.serialize(:yaml, :file => tmp)
+ doc = Treat::Entities::Document(tmp)
+ assert_equal File.read(tmp).length,
+ doc.serialize(:yaml).length
+ end
+ # Test roundtrip Ruby -> XML -> Ruby -> XML.
+ create_temp_file('xml') do |tmp|
+ @doc.serialize(:xml, :file => tmp)
+ doc = Treat::Entities::Document(tmp)
+ assert_equal File.read(tmp).length,
+ doc.serialize(:xml).length
+ end
+end
- def test_readers
- # This is done by loading a collection with all types of texts.
- end
- def test_serializers_and_unserializers
- # Test roundtrip Ruby -> YAML -> Ruby -> YAML
- create_temp_file('yml') do |tmp|
- @doc.serialize(:yaml, :file => tmp)
- doc = Treat::Entities::Document(tmp)
- assert_equal File.read(tmp).length,
- doc.serialize(:yaml).length
- end
- # Test roundtrip Ruby -> XML -> Ruby -> XML.
- create_temp_file('xml') do |tmp|
- @doc.serialize(:xml, :file => tmp)
- doc = Treat::Entities::Document(tmp)
- assert_equal File.read(tmp).length,
- doc.serialize(:xml).length
- end
- end
-
- end
+def test_keywords
+ assert_nothing_raised do
+ topics = @col.topic_words(:lda)
+ @doc.keywords(:topics_frequency, :topic_words => topics)
end
end
+def test_statistics
+ @doc.chunk.segment(:tactful).tokenize
+ assert_equal 1, @word.frequency_in(:document)
+ assert_nothing_raised { @word.tf_idf ; puts @word.tf_idf }
+ # assert_nothing_raised { @doc.statistics(:position_in) }
+ # assert_nothing_raised { @doc.statistics(:transition_matrix) }
+ # assert_nothing_raised { @doc.statistics(:transition_probability) }
+end
=end
View
@@ -15,23 +15,18 @@
@det = Treat::Entities::Word.new('The')
@det.set :category, :determiner
@det.set :tag, 'DT'
- @det.set :tag_set, :penn
@adj = Treat::Entities::Word.new('lazy')
@adj.set :category, :adjective
@adj.set :tag, 'JJ'
- @adj.set :tag_set, :penn
@noun = Treat::Entities::Word.new('fox')
@noun.set :category, :noun
@noun.set :tag, 'NN'
- @noun.set :tag_set, :penn
@aux = Treat::Entities::Word.new('is')
@aux.set :category, :verb
@aux.set :tag, 'VBZ'
- @aux.set :tag_set, :penn
@verb = Treat::Entities::Word.new('running')
@verb.set :category, :verb
@verb.set :tag, 'VBG'
- @verb.set :tag_set, :penn
@dot = Treat::Entities::Punctuation.new('.')
@dot.set :tag, '.'
@paragraph << @sentence << [@noun_phrase, @verb_phrase, @dot]
@@ -146,7 +141,7 @@
end
end
- describe "#each_entity(&entity_types) { |entity| ... }" do
+ describe "#each_entity(*entity_types) { |entity| ... }" do
context "when called with no arguments" do
it "recursively yields each element in " +
@@ -282,7 +277,12 @@
end
- describe "Registrable" do
+ describe "Iterable" do
+
+ describe "#each_entity(*types)" do
+
+ it "y"
+ end
end
View
@@ -13,11 +13,13 @@
end
- describe "#describe" do
+ describe "#describe(code)" do
+
it "returns a lowercase identifier representing the " +
"full name of a language, given its ISO-639-1/2 code." do
Treat::Languages.describe(:eng).should eql :english
end
+
end
end
View
@@ -7,31 +7,49 @@
describe "#build" do
context "when supplied with a sentence" do
-
+
it "creates a sentence with the text" do
sentence = "This is a sentence."
s = Treat::Entities::Phrase.build(sentence)
s.type.should eql :sentence
s.to_s.should eql sentence
end
-
+
end
context "when supplied with a phrase" do
-
+
it "creates a phrase with the text" do
phrase = "this is a phrase"
p = Treat::Entities::Phrase.build(phrase)
p.type.should eql :phrase
p.to_s.should eql phrase
end
+
+ end
+
+ end
+
+ end
+
+ describe "Extractable" do
+
+ describe "#named_entity" do
+ it "tags the named entity words in the phrase" do
end
-
end
+ describe "#time" do
+ it "returns a DateTime object representing the time in the phrase" do
+ Treat::Languages::English::Extractors[:time].each do |e|
+ t = 'Tuesday, January 5th 2011'.time(e)
+ t.year.should eql 2011
+ end
+ end
+ end
end
-
+
describe "Processable" do
describe "#tokenize" do
@@ -124,60 +142,5 @@
end
end
-
-end
-
-=begin
-
-# encoding: utf-8
-module Treat
- module Tests
- class TestExtractors < Test::Unit::TestCase
-
- def setup
- @time = Treat::Tests::English::Time
- @date = Treat::Tests::English::Date
- @doc = Treat::Tests::English::LongDoc
- @word = Treat::Tests::English::Word
- @col = Treat::Tests::English::Collection
- end
-
- def test_time
- assert_nothing_raised { @time.time(:nickel) }
- end
-
- def test_date
- assert_equal 2011, @date.date(:chronic).year
- assert_equal 2011, @date.date(:ruby).year
- end
-
- def test_topic_words
- assert_nothing_raised { @col.topic_words(:lda) }
- end
-
- def test_named_entity
- p = 'Angela Merkel and Nicolas Sarkozy were the first ones to board the p'
- assert_nothing_raised { @doc.named_entity(:stanford) }
- end
-
- def test_keywords
- assert_nothing_raised do
- topics = @col.topic_words(:lda)
- @doc.keywords(:topics_frequency, :topic_words => topics)
- end
- end
-
- def test_statistics
- @doc.chunk.segment(:tactful).tokenize
- assert_equal 1, @word.frequency_in(:document)
- assert_nothing_raised { @word.tf_idf ; puts @word.tf_idf }
- # assert_nothing_raised { @doc.statistics(:position_in) }
- # assert_nothing_raised { @doc.statistics(:transition_matrix) }
- # assert_nothing_raised { @doc.statistics(:transition_probability) }
- end
-
- end
- end
-end
-=end
+end

0 comments on commit a0ccc40

Please sign in to comment.