Skip to content

Commit

Permalink
[ca] Dealing with deprecated characters for ela geminada
Browse files Browse the repository at this point in the history
  • Loading branch information
jaumeortola committed Dec 24, 2013
1 parent 36e47a7 commit 44538ec
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,15 @@ public List<AnalyzedToken> additionalTags(String word) {
}
return additionalTaggedTokens;
}
// Interpret deprecated characters of "ela geminada"
// U+013F LATIN CAPITAL LETTER L WITH MIDDLE DOT
// U+0140 LATIN SMALL LETTER L WITH MIDDLE DOT
if (word.contains("\u0140") || word.contains("\u013f")) {
final String lowerWord = word.toLowerCase(conversionLocale);
final String possibleWord = lowerWord.replaceAll("\u0140", "l·");
List<AnalyzedToken> taggerTokens = asAnalyzedTokenList(word,dictLookup.lookup(possibleWord));
return taggerTokens;
}
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2579,6 +2579,7 @@ $Id$
<example type="incorrect">Quan ho <marker>haguem</marker> fet.</example>
<example type="correct">Quan ho hàgiu portat</example>
<example type="correct">Si ho haguéssiu portat</example>
<example type="correct">Hi havien instaŀlat una antena.</example> <!-- Using deprecated char for ela geminada-->
</rule>
<rulegroup id="HAVER_SENSE_HAC" name="Haver sense hac: *eu/feu fet">
<rule>
Expand Down Expand Up @@ -12999,6 +13000,18 @@ $Id$
<example type="correct">en un estat sindical-laborista</example>
<example type="correct">en un estat sindical.La classe obrera</example>
</rule>
<rule id="ELA_GEMINADA_2CHAR" name="Ela geminada: evita codificació de dos caràcters" type="misspelling">
<pattern case_sensitive="no">
<marker>
<token regexp="yes">.*ŀl.*</token>
</marker>
</pattern>
<message>Error de codificació.</message>
<suggestion><match no="1" regexp_match="(.+)ŀl(.+)" regexp_replace="$1l·l$2"/></suggestion>
<short>Error tipogràfic</short>
<example type="incorrect" correction="sol·licitud">La <marker>soŀlicitud</marker>.</example>
<example type="correct">la sol·licitud</example>
</rule>
<rule id="ESPAI_DESPRES_DE_PUNT" name="Comprova que hi ha espai deprés de punt.">
<pattern>
<token><exception>www</exception></token>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,14 @@ public void testMorfologikSpeller() throws IOException {
assertEquals(1, matches.length);
assertEquals("para", matches[0].getSuggestedReplacements().get(0));
assertEquals("pare", matches[0].getSuggestedReplacements().get(1));

// deprecated characters of "ela geminada"
assertEquals(0, rule.match(langTool.getAnalyzedSentence("S'hi havien instaŀlat.")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("S'HI HAVIEN INSTAĿLAT.")).length);

assertEquals(1, rule.match(langTool.getAnalyzedSentence("aõh")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("a")).length);

}

}

0 comments on commit 44538ec

Please sign in to comment.