Skip to content

Commit

Permalink
make it actually work
Browse files Browse the repository at this point in the history
  • Loading branch information
Roger Braun committed Aug 20, 2011
1 parent d38fda7 commit f02b780
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 4 deletions.
Binary file modified lib/.maxixe.rb.swp
Binary file not shown.
4 changes: 2 additions & 2 deletions lib/maxixe.rb
Expand Up @@ -106,15 +106,15 @@ def self.generate_and_dump(n, output, *files)
end

def self.generate_training_data(n, *files)
result = n.inject({}){|r, c_n| r[c_n] = Hash.new{0}; r}
result = n.inject({}){|r, c_n| r[c_n.to_s] = Hash.new{0}; r}

files.each do |file|
input = open(file)
input.each_line do |line|
n.each do |c_n|
n_grams = line.each_char.each_cons(c_n).map(&:join).to_a
n_grams.each do |n_gram|
result[c_n][n_gram] += 1
result[c_n.to_s][n_gram] += 1
end
end
end
Expand Down
Binary file modified spec/segmenter/.segmenter_spec.rb.swp
Binary file not shown.
12 changes: 11 additions & 1 deletion spec/segmenter/segmenter_spec.rb
Expand Up @@ -7,7 +7,7 @@
@sentence = "1234567"
@two_grams = @sentence.each_char.each_cons(2).to_a
@three_grams = @sentence.each_char.each_cons(3).to_a
@segmenter = Maxixe::Segmenter.new({"2" => "", "3" => ""}, nil)
@segmenter = Maxixe::Segmenter.new({})
end

it "should give all non_straddling n_grams for a given position" do
Expand Down Expand Up @@ -54,4 +54,14 @@
@segmenter.average_votes(votes).should == [0.5, 0.5, 0.5, 0.5]
end
end

describe "Segmenting Text" do
before(:each) do
@segmenter = Maxixe::Segmenter.new({"2"=>{"AB"=>2, "BC"=>2, "CD"=>1, "DE"=>1, "EF"=>1, "FG"=>1, "G\n"=>1, "CX"=>1, "XY"=>1, "YZ"=>1, "Z\n"=>1}, "3"=>{"ABC"=>2, "BCD"=>1, "CDE"=>1, "DEF"=>1, "EFG"=>1, "FG\n"=>1, "BCX"=>1, "CXY"=>1, "XYZ"=>1, "YZ\n"=>1}})
end

it "should be able to segment text" do
@segmenter.segment("ABCDE").should == "ABC DE"
end
end
end
Binary file modified spec/trainer/.trainer_spec.rb.swp
Binary file not shown.
2 changes: 1 addition & 1 deletion spec/trainer/trainer_spec.rb
Expand Up @@ -6,7 +6,7 @@

pwd = File.dirname(__FILE__)

Maxixe::Trainer.generate_training_data([2,3], File.join(pwd, "first_file"), File.join(pwd,"second_file")).should == {2=>{"AB"=>2, "BC"=>2, "CD"=>1, "DE"=>1, "EF"=>1, "FG"=>1, "G\n"=>1, "CX"=>1, "XY"=>1, "YZ"=>1, "Z\n"=>1}, 3=>{"ABC"=>2, "BCD"=>1, "CDE"=>1, "DEF"=>1, "EFG"=>1, "FG\n"=>1, "BCX"=>1, "CXY"=>1, "XYZ"=>1, "YZ\n"=>1}}
Maxixe::Trainer.generate_training_data([2,3], File.join(pwd, "first_file"), File.join(pwd,"second_file")).should == {"2"=>{"AB"=>2, "BC"=>2, "CD"=>1, "DE"=>1, "EF"=>1, "FG"=>1, "G\n"=>1, "CX"=>1, "XY"=>1, "YZ"=>1, "Z\n"=>1}, "3"=>{"ABC"=>2, "BCD"=>1, "CDE"=>1, "DEF"=>1, "EFG"=>1, "FG\n"=>1, "BCX"=>1, "CXY"=>1, "XYZ"=>1, "YZ\n"=>1}}

end

Expand Down

0 comments on commit f02b780

Please sign in to comment.