Skip to content

Commit

Permalink
lou_backTranslateString.c fix
Browse files Browse the repository at this point in the history
This commit fixes back translation problems when words gets split in
unusual places causeing back translation of whole words for example K5
back translates to Knowledgeen, M>k back translates to Moreark, and M5
back translates to Moren.  This caused over 8400 extra back
translation problems.
  • Loading branch information
krperry committed Jul 1, 2014
1 parent 1629638 commit 643928c
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 19 deletions.
5 changes: 5 additions & 0 deletions NEWS
Expand Up @@ -5,6 +5,11 @@ liblouis NEWS -- history of user-visible changes. -*- org -*-
- Added a callback system for logging purposes.

** Bug fixes
- fix back translation problems when word gets split in unusual places
causeing back translation of whole words for example K5 back
translates to Knowledgeen, M>k back translates to Moreark, and M5 back
translates to Moren. This caused over 8400 extra back translation errors in en-us-g2 and 5000 in en-ueb-g2..

- Fixed bug to prevent removal of \xffff between largesign rules. This
solves a LibLouisUTDML bug where \xffff is used as a segment delimiter.
- Fixed a bug in backtranslation, when a letsign was encountered, the
Expand Down
48 changes: 29 additions & 19 deletions liblouis/lou_backTranslateString.c
Expand Up @@ -416,27 +416,37 @@ static int
isEndWord ()
{
/*See if this is really the end of a word. */
int k = src + currentDotslen;
TranslationTableOffset ruleOffset;
unsigned long int makeHash;
const TranslationTableCharacter *dots =
back_findCharOrDots (currentInput[k], 1);
if (k >= srcmax)
return 1;
if (dots->attributes & CTC_Space)
return 1;
if (dots->attributes & CTC_Letter)
return 0;
makeHash = (unsigned long int) dots->lowercase << 8;
makeHash += (unsigned long int) (back_findCharOrDots
(currentInput[k + 1], 1))->lowercase;
makeHash %= HASHNUM;
ruleOffset = table->backRules[makeHash];
if (ruleOffset != 0)
return 0;
int k;
const TranslationTableCharacter *dots;
TranslationTableOffset testRuleOffset;
TranslationTableRule *testRule;
for (k = src + currentDotslen; k < srcmax; k++)
{
int postpuncFound = 0;
int TranslationFound = 0;
dots = back_findCharOrDots (currentInput[k], 1);
testRuleOffset = dots->otherRules;
if (dots->attributes & CTC_Space)
break;
if (dots->attributes & CTC_Letter)
return 0;
while (testRuleOffset)
{
testRule =
(TranslationTableRule *) & table->ruleArea[testRuleOffset];
if (testRule->charslen > 1)
TranslationFound = 1;
if (testRule->opcode == CTO_PostPunc)
postpuncFound = 1;
if (testRule->opcode == CTO_Hyphen)
return 1;
testRuleOffset = testRule->dotsnext;
}
if (TranslationFound && !postpuncFound)
return 0;
}
return 1;
}

static int
findBrailleIndicatorRule (TranslationTableOffset offset)
{
Expand Down

0 comments on commit 643928c

Please sign in to comment.