Skip to content

Commit

Permalink
[uk] improve adj/noun inflection suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
arysin committed Dec 23, 2016
1 parent bd11e23 commit 54d35cf
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 17 deletions.
Expand Up @@ -104,7 +104,10 @@ static List<Inflection> getAdjInflections(List<AnalyzedToken> adjTokenReadings)
animTag = matcher.group(3).substring(2); // :rinanim/:ranim
}

masterInflections.add(new Inflection(gen, vidm, animTag));
Inflection inflection = new Inflection(gen, vidm, animTag);
if( ! masterInflections.contains(inflection) ) {
masterInflections.add(inflection);
}
}
return masterInflections;
}
Expand All @@ -125,7 +128,10 @@ static List<Inflection> getNounInflections(List<AnalyzedToken> nounTokenReadings
String vidm = matcher.group(3);
String animTag = matcher.group(1);

slaveInflections.add(new Inflection(gen, vidm, animTag));
Inflection inflection = new Inflection(gen, vidm, animTag);
if( ! slaveInflections.contains(inflection) ) {
slaveInflections.add(inflection);
}
}
return slaveInflections;
}
Expand Down
Expand Up @@ -206,7 +206,7 @@ else if ( nounPosTag.equals(JLanguageTool.SENTENCE_END_TAGNAME)
slaveTokenReadings.get(0).getToken() + ": " + slaveInflections+ " // " + slaveTokenReadings));
}

String msg = String.format("Потенційна помилка: прикметник неузгоджений з іменником: \"%s\": [%s] і \"%s\": [%s]",
String msg = String.format("Потенційна помилка: прикметник не узгоджений з іменником: \"%s\": [%s] і \"%s\": [%s]",
adjTokenReadings.get(0).getToken(), formatInflections(masterInflections, true),
slaveTokenReadings.get(0).getToken(), formatInflections(slaveInflections, false));

Expand All @@ -230,7 +230,7 @@ else if ( nounPosTag.equals(JLanguageTool.SENTENCE_END_TAGNAME)


if( ! adjInflection._case.equals("v_kly")
&& (adjInflection._case.equals("p")
&& (adjInflection.gender.equals("p")
|| PosTagHelper.hasPosTagPart(slaveTokenReadings, genderTag)) ) {
for(AnalyzedToken nounToken: slaveTokenReadings) {

Expand All @@ -251,29 +251,28 @@ else if ( nounPosTag.equals(JLanguageTool.SENTENCE_END_TAGNAME)
}
}
}

}

for (Inflection nounInflection : slaveInflections) {
String genderTag = ":"+nounInflection.gender+":";
String vidmTag = nounInflection._case;

if( nounInflection.animMatters() ) {
vidmTag += ":r" + nounInflection.animTag;
}
if( nounInflection.animMatters() ) {
vidmTag += ":r" + nounInflection.animTag;
}

for(AnalyzedToken adjToken: adjTokenReadings) {
String newAdjTag = adjToken.getPOSTag().replaceFirst(":.:v_...(:r(in)?anim)?", genderTag + vidmTag);
for(AnalyzedToken adjToken: adjTokenReadings) {
String newAdjTag = adjToken.getPOSTag().replaceFirst(":.:v_...(:r(in)?anim)?", genderTag + vidmTag);

String[] synthesized = ukrainianSynthesizer.synthesize(adjToken, newAdjTag, false);
String[] synthesized = ukrainianSynthesizer.synthesize(adjToken, newAdjTag, false);

for (String s : synthesized) {
String suggestion = s + " " + tokenReadings.getToken();
if( ! suggestions.contains(suggestion) ) {
suggestions.add(suggestion);
}
}
for (String s : synthesized) {
String suggestion = s + " " + tokenReadings.getToken();
if( ! suggestions.contains(suggestion) ) {
suggestions.add(suggestion);
}
}
}

}

Expand Down
Expand Up @@ -71,6 +71,12 @@ public void testRule() throws IOException {
assertEquals(1, rule.match(langTool.getAnalyzedSentence("кволого тюльпан")).length);
assertEquals(1, rule.match(langTool.getAnalyzedSentence("цинічна винахідливості")).length);

RuleMatch[] matches0 = rule.match(langTool.getAnalyzedSentence("4 російських винищувача"));
assertEquals(1, matches0.length);
assertTrue("Message is wrong: " + matches0[0].getMessage(),
matches0[0].getMessage().contains("[ч.р.: родовий, знахідний]"));
assertEquals(Arrays.asList("російських винищувачів", "російських винищувачах", "російського винищувача"), matches0[0].getSuggestedReplacements());

// from real examples

// і-и
Expand Down

0 comments on commit 54d35cf

Please sign in to comment.