Skip to content

Commit

Permalink
Refactor and split nisi, neu, seu, mede, oude etc
Browse files Browse the repository at this point in the history
  • Loading branch information
LFDM committed Aug 18, 2014
1 parent 9c4d66c commit b3ee522
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 15 deletions.
32 changes: 17 additions & 15 deletions lib/llt/tokenizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def find_abbreviations_and_join_strings
ENCLITICS = %w{ que ne ve c }
def split_enklitika_and_change_their_position
split_with_force
split_nec_and_oute
split_frequent_enclitics # like latin c, ve or greek te, de
make_frequent_corrections
end

Expand Down Expand Up @@ -202,22 +202,24 @@ def enclitic(val)
"#{@enclitics_marker}#{val}"
end

def split_nec_and_oute
nec_indices = []
oute_indices = []
@worker.each_with_index do |token, i|
case token
when /^nec$/i
token.slice!(-1)
nec_indices << (i + nec_indices.size + @shift_range)
when /^(οὐ|μή|εἰ)τε$/i
token.slice!(-2, 2)
oute_indices << (i + oute_indices.size + @shift_range)
ENCLITICS_MAP = {
/^(nec)$/i => 'c',
/^(ne|se)u$/i => 'u',
/^(nisi)$/i => 'si',
/^(οὐ|μή|εἰ)τε$/i => 'τε',
/^(οὐ|μή)δε$/i => 'δε',
}
def split_frequent_enclitics
ENCLITICS_MAP.each do |regex, encl|
container = []
@worker.each_with_index do |token, i|
if token.match(regex)
token.slice!(-encl.length, encl.length)
container << (i + container.size + @shift_range)
end
end
container.each { |i| @worker.insert(i, enclitic(encl)) }
end

nec_indices.each { |i| @worker.insert(i, enclitic('c')) }
oute_indices.each { |i| @worker.insert(i, enclitic('τε')) }
end

def make_frequent_corrections
Expand Down
38 changes: 38 additions & 0 deletions spec/lib/llt/tokenizer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,31 @@ def enklitika_test(example)
end
end

context "when confronted with -u" do
examples = {
'seu' => '-u se',
'neu' => '-u ne'
}

examples.each do |example, expected|
it "transforms #{example} to #{expected}" do
enklitika_test(example).should be_transformed_to expected
end
end
end

context "when confronted with -si" do
examples = {
'nisi' => '-si ni'
}

examples.each do |example, expected|
it "transforms #{example} to #{expected}" do
enklitika_test(example).should be_transformed_to expected
end
end
end

context "when confronted with -τε" do
examples = {
'οὐτε' => '-τε οὐ',
Expand All @@ -274,6 +299,19 @@ def enklitika_test(example)
end
end
end

context "when confronted with -δε" do
examples = {
'οὐδε' => '-δε οὐ',
'μήδε' => '-δε μή'
}

examples.each do |example, expected|
it "transforms #{example} to #{expected}" do
enklitika_test(example).should be_transformed_to expected
end
end
end
end

describe "#merge_what_needs_merging" do
Expand Down

0 comments on commit b3ee522

Please sign in to comment.