Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/opcode-unification' into feature…
Browse files Browse the repository at this point in the history
…/ueb_update
  • Loading branch information
egli committed Jan 13, 2016
2 parents 6d10c38 + b02b171 commit de655be
Show file tree
Hide file tree
Showing 42 changed files with 1,015 additions and 669 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ TAGS
/tests/uplow_with_unicode
/tests/logging
/tests/findTable
/tests/emphclass
/tests/yaml/*.log
/tests/yaml/*.trs

Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ AC_CONFIG_FILES([
tests/Makefile
tests/resolve_table.h
tests/tables/Makefile
tests/tables/emphclass/Makefile
tests/tables/moreTables/Makefile
tests/tables/resolve_table/Makefile
tests/tables/resolve_table/dir_1/Makefile
Expand Down
281 changes: 170 additions & 111 deletions liblouis/compileTranslationTable.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,122 +242,33 @@ static const char *opcodeNames[CTO_None] = {
"noletsign",
"noletsignafter",
"numsign",
// "numericmodechars",
// "numericnocontchars",
"seqdelimiter",
"seqbeforechars",
"seqafterchars",
"seqafterpattern",
"firstwordital",
// "numericmodechars",
// "numericnocontchars",
"seqdelimiter",
"seqbeforechars",
"seqafterchars",
"seqafterpattern",
"italsign",
"lastworditalbefore",
"lastworditalafter",
"begital",
"firstletterital",
"endital",
"lastletterital",
"singleletterital",
"italword",
"italwordstop",
"lenitalphrase",
"firstwordbold",
"boldsign",
"lastwordboldbefore",
"lastwordboldafter",
"begbold",
"firstletterbold",
"endbold",
"lastletterbold",
"singleletterbold",
"boldword",
"boldwordstop",
"lenboldphrase",
"firstwordunder",
"undersign",
"lastwordunderbefore",
"lastwordunderafter",
"begunder",
"firstletterunder",
"endunder",
"lastletterunder",
"singleletterunder",
"underword",
"underwordstop",
"lenunderphrase",

"singleletterscript",
"scriptword",
"scriptwordstop",
"firstletterscript",
"lastletterscript",
"firstwordscript",
"lastwordscriptbefore",
"lastwordscriptafter",
"lenscriptphrase",

"singlelettertrans1",
"trans1word",
"trans1wordstop",
"firstlettertrans1",
"lastlettertrans1",
"firstwordtrans1",
"lastwordtrans1before",
"lastwordtrans1after",
"lentrans1phrase",

"singlelettertrans2",
"trans2word",
"trans2wordstop",
"firstlettertrans2",
"lastlettertrans2",
"firstwordtrans2",
"lastwordtrans2before",
"lastwordtrans2after",
"lentrans2phrase",

"singlelettertrans3",
"trans3word",
"trans3wordstop",
"firstlettertrans3",
"lastlettertrans3",
"firstwordtrans3",
"lastwordtrans3before",
"lastwordtrans3after",
"lentrans3phrase",

"singlelettertrans4",
"trans4word",
"trans4wordstop",
"firstlettertrans4",
"lastlettertrans4",
"firstwordtrans4",
"lastwordtrans4before",
"lastwordtrans4after",
"lentrans4phrase",

"singlelettertrans5",
"trans5word",
"trans5wordstop",
"firstlettertrans5",
"lastlettertrans5",
"firstwordtrans5",
"lastwordtrans5before",
"lastwordtrans5after",
"lentrans5phrase",

"singlelettertransnote",
"transnoteword",
"transnotewordstop",
"firstlettertransnote",
"lastlettertransnote",
"firstwordtransnote",
"lastwordtransnotebefore",
"lastwordtransnoteafter",
"lentransnotephrase",

"capsmodechars",
// "emphmodechars",

"emphclass",
"singleletteremph",
"emphword",
"emphwordstop",
"firstletteremph",
"lastletteremph",
"firstwordemph",
"lastwordemphbefore",
"lastwordemphafter",
"lenemphphrase",
"capsmodechars",
// "emphmodechars",
"begcomp",
"compbegemph1",
"compendemph1",
Expand Down Expand Up @@ -405,7 +316,7 @@ static const char *opcodeNames[CTO_None] = {
"exactdots",
"nocross",
"syllable",
"nocontractsign",
"nocontractsign",
"nocont",
"compbrl",
"literal",
Expand Down Expand Up @@ -433,8 +344,8 @@ static const char *opcodeNames[CTO_None] = {
// "apostrophe",
// "initial",
"nobreak",
"match",
"attribute",
"match",
"attribute",
};
static short opcodeLengths[CTO_None] = { 0 };

Expand Down Expand Up @@ -4087,6 +3998,7 @@ compileRule (FileInfo * nested)
CharsString ruleDots;
CharsString cells;
CharsString scratchPad;
CharsString emphClass;
TranslationTableCharacterAttributes after = 0;
TranslationTableCharacterAttributes before = 0;
TranslationTableCharacter *c = NULL;
Expand All @@ -4107,6 +4019,132 @@ compileRule (FileInfo * nested)
return 1;
}
opcode = getOpcode (nested, &token);

/* these 9 general purpose emphasis opcodes are compiled further down to more specific internal opcodes:
* - singleletteremph
* - emphword
* - emphwordstop
* - firstletteremph
* - lastletteremph
* - firstwordemph
* - lastwordemphbefore
* - lastwordemphafter
* - lenemphphrase
*/
switch (opcode)
{
case CTO_EmphClass:
if (getToken(nested, &token, "emphasis class"))
if (parseChars(nested, &emphClass, &token))
{
char * s = malloc(sizeof(char) * (emphClass.length + 1));
for (k = 0; k < emphClass.length; k++)
s[k] = (char)emphClass.chars[k];
s[k++] = '\0';
for (i = 0; table->emphClasses[i]; i++)
if (strcmp(s, table->emphClasses[i]) == 0)
{
logMessage (LOG_WARN, "Duplicate emphasis class: %s", s);
warningCount++;
free(s);
return 1;
}
if (i < MAX_EMPH_CLASSES)
{
switch (i)
{
case 0:
/* For backwards compatibility (i.e. because programs will assume the first 3
* typeform bits are `italic', `underline' and `bold') we require that the first
* 3 emphclass definitions are (in that order):
*
* emphclass italic
* emphclass underline
* emphclass bold
*
* While it would be possible to use the emphclass opcode only for defining
* _additional_ classes (not allowing for them to be called italic, underline or
* bold), thereby reducing the amount of boilerplate, we deliberately choose not
* to do that in order to not give italic, underline and bold any special
* status. The hope is that eventually all programs will use liblouis for
* emphasis the recommended way (i.e. by looking up the supported typeforms in
* the documentation or API) so that we can drop this restriction.
*/
if (strcmp(s, "italic") != 0)
{
logMessage (LOG_ERROR, "First emphasis class must be \"italic\" but got %s", s);
errorCount++;
return 0;
}
break;
case 1:
if (strcmp(s, "underline") != 0)
{
logMessage (LOG_ERROR, "Second emphasis class must be \"underline\" but got %s", s);
errorCount++;
return 0;
}
break;
case 2:
if (strcmp(s, "bold") != 0)
{
logMessage (LOG_ERROR, "Third emphasis class must be \"bold\" but got %s", s);
errorCount++;
return 0;
}
break;
}
table->emphClasses[i] = s;
table->emphClasses[i+1] = NULL;
return 1;
}
else
{
logMessage (LOG_ERROR, "Max number of emphasis classes (%i) reached", MAX_EMPH_CLASSES);
errorCount++;
free(s);
return 0;
}
}
compileError (nested, "emphclass must be followed by a valid class name.");
return 0;
case CTO_SingleLetterEmph:
case CTO_EmphWord:
case CTO_EmphWordStop:
case CTO_FirstLetterEmph:
case CTO_LastLetterEmph:
case CTO_FirstWordEmph:
case CTO_LastWordEmphBefore:
case CTO_LastWordEmphAfter:
case CTO_LenEmphPhrase:
ok = 0;
if (getToken(nested, &token, "emphasis class"))
if (parseChars(nested, &emphClass, &token))
{
char * s = malloc(sizeof(char) * (emphClass.length + 1));
for (k = 0; k < emphClass.length; k++)
s[k] = (char)emphClass.chars[k];
s[k++] = '\0';
for (i = 0; table->emphClasses[i]; i++)
if (strcmp(s, table->emphClasses[i]) == 0)
{
/* TODO: compileBrailleIndicator could be called directly here which would remove
the need for values CTO_SingleLetterItal to CTO_LenTransNotePhrase.
*/
opcode = opcode + CTO_SingleLetterItal - CTO_SingleLetterEmph + 9 * i;
ok = 1;
break;
}
if (!ok)
{
logMessage (LOG_ERROR, "Emphasis class %s not declared", s);
errorCount++;
}
free(s);
}
if (!ok)
return ok;
}
switch (opcode)
{ /*Carry out operations */
case CTO_None:
Expand Down Expand Up @@ -5707,6 +5745,7 @@ compileTranslationTable (const char *tableList)
{
char **tableFiles;
char **subTable;
int i;
errorCount = warningCount = fileCount = 0;
table = NULL;
characterClasses = NULL;
Expand All @@ -5720,6 +5759,10 @@ compileTranslationTable (const char *tableList)
opcodeLengths[opcode] = (short) strlen (opcodeNames[opcode]);
}
allocateHeader (NULL);

/* Initialize emphClasses array */
table->emphClasses[0] = NULL;

/* Compile things that are necesary for the proper operation of
liblouis or liblouisxml or liblouisutdml */
compileString ("space \\s 0");
Expand Down Expand Up @@ -5834,6 +5877,18 @@ getLastTableList ()
return scratchBuf;
}

/* Get the emphasis classes declared in table. The emphClasses array
must be at least (MAX_EMPH_CLASSES + 1) long. */
void
getEmphClasses(const char* tableList, const char ** emphClasses)
{
int i = 0;
if (getTable(tableList))
for (; table->emphClasses[i]; i++)
emphClasses[i] = strdup(table->emphClasses[i]);
emphClasses[i] = NULL;
}

void *EXPORT_CALL
lou_getTable (const char *tableList)
{
Expand Down Expand Up @@ -5996,7 +6051,11 @@ lou_free ()
currentEntry = tableChain;
while (currentEntry)
{
free (currentEntry->table);
int i;
TranslationTableHeader *t = (TranslationTableHeader *)currentEntry->table;
for (i = 0; t->emphClasses[i]; i++)
free(t->emphClasses[i]);
free (t);
previousEntry = currentEntry;
currentEntry = currentEntry->next;
free (previousEntry);
Expand Down
20 changes: 10 additions & 10 deletions liblouis/liblouis.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,19 @@ typedef unsigned short formtype;
typedef enum
{
plain_text = 0x0000,
italic = 0x0001,
underline = 0x0002,
bold = 0x0004,
italic = 0x0001, // emph_1
underline = 0x0002, // emph_2
bold = 0x0004, // emph_3
computer_braille = 0x0008,
passage_break = 0x0010,
word_reset = 0x0020,
script = 0x0040,
trans_note = 0x0080,
trans_note_1 = 0x0100,
trans_note_2 = 0x0200,
trans_note_3 = 0x0400,
trans_note_4 = 0x0800,
trans_note_5 = 0x1000,
script = 0x0040, // emph_4
trans_note = 0x0080, // emph_5
trans_note_1 = 0x0100, // emph_6
trans_note_2 = 0x0200, // emph_7
trans_note_3 = 0x0400, // emph_8
trans_note_4 = 0x0800, // emph_9
trans_note_5 = 0x1000, // emph_10
// used by syllable 0x2000,
// used by syllable 0x4000,
} typeforms;
Expand Down

0 comments on commit de655be

Please sign in to comment.