Skip to content

Commit

Permalink
Added empmatchbefore and empmatchafter prefix opcodes and updated UEB…
Browse files Browse the repository at this point in the history
… tables to use them.

Allow capitals in checkEmphasisChange.
  • Loading branch information
MikeGray-APH committed Nov 11, 2016
1 parent da5ff78 commit 1f3c08b
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 66 deletions.
16 changes: 15 additions & 1 deletion liblouis/compileTranslationTable.c
Expand Up @@ -279,6 +279,8 @@ static const char *opcodeNames[CTO_None] = {
"before",
"noback",
"nofor",
"empmatchbefore",
"empmatchafter",
"swapcc",
"swapcd",
"swapdd",
Expand Down Expand Up @@ -3836,7 +3838,7 @@ compileRule (FileInfo * nested)
getCharacters(nested, &ptn_after);
getRuleDotsPattern(nested, &ruleDots);

if(!addRule(nested, opcode, &ruleChars, &ruleDots, 0, 0))
if(!addRule(nested, opcode, &ruleChars, &ruleDots, after, before))
ok = 0;

if(ptn_before.chars[0] == '-' && ptn_before.length == 1)
Expand Down Expand Up @@ -4689,6 +4691,18 @@ compileRule (FileInfo * nested)
goto doOpcode;
case CTO_NoFor:
nofor = 1;

case CTO_EmpMatchBefore:

before |= CTC_EmpMatch;
goto doOpcode;

case CTO_EmpMatchAfter:

after |= CTC_EmpMatch;
goto doOpcode;


goto doOpcode;
case CTO_SwapCc:
case CTO_SwapCd:
Expand Down
22 changes: 19 additions & 3 deletions liblouis/lou_translateString.c
Expand Up @@ -2070,7 +2070,7 @@ checkEmphasisChange(const int skip)
{
int i;
for(i = src + (skip + 1); i < src + transRule->charslen; i++)
if((emphasisBuffer[i] & ~CAPS_EMPHASIS) || transNoteBuffer[i])
if(emphasisBuffer[i] || transNoteBuffer[i])
return 1;
return 0;
}
Expand Down Expand Up @@ -2219,11 +2219,27 @@ for_selectRule ()
transCharslen = transRule->charslen;
if (tryThis == 1 || ((transCharslen <= length) && validMatch ()))
{
/* check before emphasis match */
if(transRule->before & CTC_EmpMatch)
{
if( emphasisBuffer[src]
|| transNoteBuffer[src])
break;
}

/* check before emphasis match */
if(transRule->after & CTC_EmpMatch)
{
if( emphasisBuffer[src + transCharslen]
|| transNoteBuffer[src + transCharslen])
break;
}

/* check this rule */
setAfter (transCharslen);
if ((!transRule->after || (beforeAttributes
if ((!(transRule->after & ~CTC_EmpMatch) || (beforeAttributes
& transRule->after)) &&
(!transRule->before || (afterAttributes
(!(transRule->before & ~CTC_EmpMatch) || (afterAttributes
& transRule->before)))
switch (transOpcode)
{ /*check validity of this Translation */
Expand Down
5 changes: 4 additions & 1 deletion liblouis/louis.h
Expand Up @@ -110,7 +110,8 @@ typedef enum {
CTC_CapsMode = 0x1000000,
CTC_NumericMode = 0x2000000,
CTC_NumericNoContract = 0x4000000,
CTC_EndOfInput = 0x8000000 // used by pattern matcher
CTC_EndOfInput = 0x8000000, // only used by pattern matcher
CTC_EmpMatch = 0x10000000, // only used in TranslationTableRule->before and TranslationTableRule->after
} TranslationTableCharacterAttribute;

typedef enum {
Expand Down Expand Up @@ -261,6 +262,8 @@ typedef enum { /*Op codes */
CTO_Before, /*only match if before character in class 30 */
CTO_NoBack,
CTO_NoFor,
CTO_EmpMatchBefore,
CTO_EmpMatchAfter,
CTO_SwapCc,
CTO_SwapCd,
CTO_SwapDd,
Expand Down
2 changes: 1 addition & 1 deletion liblouis/pattern.c
Expand Up @@ -1380,7 +1380,7 @@ static int pattern_check_attrs(const widechar input_char, const widechar *expr_d
{
int attrs;

attrs = ((expr_data[0] << 16) | expr_data[1]) & ~CTC_EndOfInput;
attrs = ((expr_data[0] << 16) | expr_data[1]) & ~(CTC_EndOfInput | CTC_EmpMatch);
if(!checkAttr(input_char, attrs, 0))
return 0;
return 1;
Expand Down
86 changes: 36 additions & 50 deletions tables/en-ueb-g2.ctb
Expand Up @@ -535,50 +535,38 @@ sufword subbasement 234-136-12-12-1-234-15-56-2345
sufword subbing 234-136-12-12-346

# be 10.6.1 10.6.2 10.6.3 10.6.4
match %[^_~]%<* be ati!n 23 beati*
match %[^_~]%<* be atr 23 bea*
match %[^_~]%<* be c![hkq] 23 bec*
match %[^_~]%<* be dra 23 bedr*
match %[^_~]%<* be de!(%>*%[_~^]) 23 bede*
match %[^_~]%<* be da 23 bed*
match %[^_~]%<* be elz 23 bee*
match %[^_~]%<* be g![gs] 23 beg*
match %[^_~]%<* be in 23 bei*
match %[^_~]%<* be l[aeiouwy] 23 bel*
match %[^_~]%<* be neficen 23 beneficen*
match %[^_~]%<* be net(['’]s)?%>*%[_~^] 23 benet
match %[^_~]%<* be ne![dft] 23 bene*
match %[^_~]%<* be n[aiu] 23 ben*
match %[^_~]%<* be r[a] 23 bere*
match %[^_~]%<* be re!n 23 ber*
match %[^_~]%<* be stir 23 besti*
match %[^_~]%<* be st[or] 23 best*
match %[^_~]%<* be s![st] 23 bes*
match %[^_~]%<* be t![chst'’] 23 bet*
match %[^_~]%<* be [bfhjmopqwxz] 23

#nofor begword beatif 23-1-2345-24-124
#nofor begword beatit 23-1-2345-24-2345
#nofor begword beb 23-12
#nofor begword becal 23-14-1-123
#nofor begword beco 23-14-135
#nofor begword beda 23-145-1
#nofor begword bede 23-145-15
#nofor begword bedr 23-145-1235
#nofor begword bef 23-124
#nofor begword beg 23-1245
#nofor begword beh 23-125
empmatchafter match %[^_~]%<* be [Aa][Tt][Ii]![Nn] 23 beati*
empmatchafter match %[^_~]%<* be [Aa][Tt][Rr] 23 bea*
empmatchafter match %[^_~]%<* be [Cc]![HKQhkq] 23 bec*
empmatchafter match %[^_~]%<* be [Dd][Rr][Aa] 23 bedr*
empmatchafter match %[^_~]%<* be [Dd][Ee]!(%>*%[_~^]) 23 bede*
empmatchafter match %[^_~]%<* be [Dd][Aa] 23 bed*
empmatchafter match %[^_~]%<* be [Ee][Ll][Zz] 23 bee*
empmatchafter match %[^_~]%<* be [Gg]![GSgs] 23 beg*
empmatchafter match %[^_~]%<* be [Ii][Nn] 23 bei*
empmatchafter match %[^_~]%<* be [Ll][AEIOUWYaeiouwy] 23 bel*
empmatchafter match %[^_~]%<* be [Nn][Ee][Ff][Ii][Cc][Ee][Nn] 23 beneficen*
empmatchafter match %[^_~]%<* be [Nn][Ee][Tt](['’]s)?%>*%[_~^] 23 benet
empmatchafter match %[^_~]%<* be [Nn][Ee]![DFTdft] 23 bene*
empmatchafter match %[^_~]%<* be [Nn][AIUaiu] 23 ben*
empmatchafter match %[^_~]%<* be [Rr][Aa] 23 bere*
empmatchafter match %[^_~]%<* be [Rr][Ee]![Nn] 23 ber*
empmatchafter match %[^_~]%<* be [Ss][Tt][Ii][Rr] 23 besti*
empmatchafter match %[^_~]%<* be [Ss][Tt][ORor] 23 best*
empmatchafter match %[^_~]%<* be [Ss]![STst] 23 bes*
empmatchafter match %[^_~]%<* be [Tt]![CHSTchst'’] 23 bet*
empmatchafter match %[^_~]%<* be [BFHJMOPQWXZbfhjmopqwxz] 23

nofor sufword be 23

# easier if after "be" cause both are dots 23
# bb 10.6.5 10.6.6
match %a bb %a 23
empmatchbefore empmatchafter match %a bb %a 23

nofor always bb 23

# cc 10.6.5 10.6.6
match %a cc %a 25
empmatchbefore empmatchafter match %a cc %a 25

nofor always cc 25

Expand All @@ -591,30 +579,28 @@ sufword saccharin 234-1-14-16-345-35
sufword zucchini 1356-136-14-16-35-24

# con 10.6.1 10.6.2 10.6.3 10.6.4
match %[^_~]%<* con c!h 25
match %[^_~]%<* con est 25
match %[^_~]%<* con s%a 25 cons "mod cons" 10.6.4
sufword conned 14-135-1345-1345-1246
#match %[^_~]%<* con n(!e|%>*%[^_~]) 25 conned
#match %[^_~]%<* con ne!d 25 "
match %[^_~]%<* con [abdfghijlmnopqrtuvwxyz] 25
empmatchafter match %[^_~]%<* con [Cc]![Hh] 25
empmatchafter match %[^_~]%<* con [Ee][Ss][Tt] 25
empmatchafter match %[^_~]%<* con s%a 25 [Cc][Oo][Nn][Ss] "mod cons" 10.6.4
empmatchafter match %[^_~]%<* con [ABDFGHIJLMNOPQRTUVWXYZabdfghijlmnopqrtuvwxyz] 25
sufword conakry 14-135-1345-1-13-1235-13456
sufword conan 14-135-1345-1-1345
sufword conned 14-135-1345-1345-1246

nofor begword con 25

# dis 10.6.1 10.6.2 10.6.3 10.6.4
word diss 145-24-234-234
match %[^_~]%<* dis c![s'’] 256
match %[^_~]%<* dis he![dsv] 256
match %[^_~]%<* dis h![ceiprtw'’] 256
match %[^_~]%<* dis p!i 256
match %[^_~]%<* dis [abdefgijlmnoqrstuvwxyz] 256
empmatchafter match %[^_~]%<* dis [Cc]![Ss'’] 256
empmatchafter match %[^_~]%<* dis [Hh][Ee]![DSVdsv] 256
empmatchafter match %[^_~]%<* dis [Hh]![CEIPRTWceiprtw'’] 256
empmatchafter match %[^_~]%<* dis [Pp]![Ii] 256
empmatchafter match %[^_~]%<* dis [ABDEFGIJLMNOQRSTUVWXYZabdefgijlmnoqrstuvwxyz] 256

nofor begword dis 256

# ea 10.6.5 10.6.6 10.6.7
match %a ea %a 2
empmatchbefore empmatchafter match %a ea %a 2

# cheap fix, still fails with ,ea,ea,
nofor always , 2
Expand Down Expand Up @@ -673,7 +659,7 @@ sufword toenail 2345-135-15-1345-1-24-123
sufword turtleneck 2345-136-1235-2345-123-15-1345-15-14-13

# ff 10.6.5 10.6.6
match %a ff %a 235
empmatchbefore empmatchafter match %a ff %a 235

nofor always ff 235

Expand All @@ -684,7 +670,7 @@ sufword effort 15-124-123456-2345
sufword stafford 34-1-124-123456-145

# gg 10.6.5 10.6.6
match %a gg %a 2356
empmatchbefore empmatchafter match %a gg %a 2356

nofor always gg 2356

Expand Down
1 change: 0 additions & 1 deletion tests/yaml/en-ueb-08-capitalization.yaml
Expand Up @@ -138,7 +138,6 @@ tests:
- un,,self,'i%
- - XXIInd
- ',,xxii,''nd'
- xfail: true
- - VIIb
- ',,vii,''b'

Expand Down
9 changes: 0 additions & 9 deletions tests/yaml/en-ueb-10-07-contractions.yaml
Expand Up @@ -798,7 +798,6 @@ tests:
- dis.1turb
- typeform:
italic: ' ++++'
xfail: true

# 10.6.3 page 118

Expand All @@ -810,7 +809,6 @@ tests:
- ',dis,,cord'
- - DISlike
- ',,dis,''like'
- xfail: true

# 10.6.4 page 118

Expand Down Expand Up @@ -1015,7 +1013,6 @@ tests:
- /iff.1ly
- typeform:
italic: ' ++'
xfail: true
- - subbasement
- subbase;t
- - tea
Expand All @@ -1027,19 +1024,14 @@ tests:

- - CliffSide
- ',cliff,side'
- xfail: true
- - EggHead
- ',egg,h1d'
- xfail: true
- - MacEACHEN
- ',mac,,ea*5'
- xfail: true
- - SeaWorld
- ',sea,_w'
- xfail: true
- - TEAspoon
- ',,tea,''spoon'
- xfail: true

# 10.6.7 page 121

Expand Down Expand Up @@ -1232,7 +1224,6 @@ tests:
- _!s
- - SeaWorld
- ',sea,_w'
- xfail: true
- - underworld
- '"u_w'
- - world-wide
Expand Down

0 comments on commit 1f3c08b

Please sign in to comment.