Skip to content

Commit

Permalink
Change how emphasis phrases are determined
Browse files Browse the repository at this point in the history
Also allow non-whole words at the start and end of a phrase.

Whether the last word contributes to the length of the phrase if it is
not a whole word depends on whether endemphphrase after or before is
used.

For capitalization the behavior was not changed.
  • Loading branch information
bertfrees committed Nov 24, 2023
1 parent 3206c8c commit 30df4d9
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 64 deletions.
149 changes: 86 additions & 63 deletions liblouis/lou_translateString.c
Original file line number Diff line number Diff line change
Expand Up @@ -2878,12 +2878,11 @@ convertToPassage(const int pass_start, const int pass_end, const int word_start,
const TranslationTableOffset *emphRule = table->emphRules[class->rule];
const TranslationTableRule *indicRule;

for (i = pass_start; i <= pass_end; i++)
if (wordBuffer[i] & WORD_WHOLE) {
buffer[i].symbol &= ~class->value;
buffer[i].word &= ~class->value;
wordBuffer[i] &= ~WORD_WHOLE;
}
for (i = pass_start; i <= pass_end; i++) {
buffer[i].symbol &= ~class->value;
buffer[i].word &= ~class->value;
wordBuffer[i] &= ~WORD_WHOLE;
}

buffer[pass_start].begin |= class->value;
if (brailleIndicatorDefined(emphRule[endOffset], table, &indicRule) ||
Expand Down Expand Up @@ -2912,68 +2911,92 @@ resolveEmphasisPassages(EmphasisInfo *buffer, const EmphasisClass *class,
const TranslationTableHeader *table, const InString *input,
unsigned int *wordBuffer) {
const TranslationTableOffset *emphRule = table->emphRules[class->rule];
unsigned int word_cnt = 0;
int pass_start = -1, pass_end = -1, word_start = -1, in_word = 0, in_pass = 0;
int i;
int in_word = 0, last_word_start = -1, last_word_end = -1;
int in_emph_word = 0, last_emph_symbol = -1;
int in_pass = 0, last_pass_word_start = -1, last_pass_word_end = -1, pass_start = -1;
unsigned int pass_word_cnt = 0;
int endphraseafter_defined = emphRule[endPhraseAfterOffset] || emphRule[endOffset];

for (i = 0; i < input->length; i++) {
/* check if at beginning of word (first character that is not a space) */
if (!in_word)
if (wordBuffer[i] & WORD_CHAR) {
in_word = 1;
/* only whole emphasized words can be part of a passage (in case of caps,
* this also includes words without letters, but only if the next word
* with letters is a whole word) */
if (wordBuffer[i] & WORD_WHOLE) {
if (!in_pass) {
in_pass = 1;
pass_start = i;
pass_end = -1;
word_cnt = 1;
} else
word_cnt++;
word_start = i;
continue;
} else if (in_pass) {
/* it is a passage only if the number of words is greater than or
* equal to the minimum length (lencapsphrase / lenemphphrase) */
if (word_cnt >= emphRule[lenPhraseOffset])
if (pass_end >= 0) {
convertToPassage(pass_start, pass_end, word_start, buffer,
class, table, wordBuffer);
}
in_pass = 0;
}
}
for (int i = 0; i < input->length; i++) {

/* check if at end of word */
if (in_word)
if (!(wordBuffer[i] & WORD_CHAR)) {
in_word = 0;
if (in_pass) pass_end = i;
}
/* check if at beginning of word (words are determined by isEmphSpace() and
* further refined at the beginning and end of words based on isEmphasizable()) */
if (!in_word && wordBuffer[i] & WORD_CHAR) {
in_word = 1;
last_word_start = i;
} else /* check if at end of word */
if (in_word && !(wordBuffer[i] & WORD_CHAR)) {
in_word = 0;
last_word_end = i;
}

if (in_pass)
if ((buffer[i].begin | buffer[i].end | buffer[i].word | buffer[i].symbol) &
class->value) {
if (word_cnt >= emphRule[lenPhraseOffset])
if (pass_end >= 0) {
convertToPassage(pass_start, pass_end, word_start, buffer, class,
table, wordBuffer);
}
in_pass = 0;
/* check for symbol or word indicator */
if (!in_emph_word &&
(buffer[i].symbol & class->value ||
(buffer[i].word & class->value &&
!(buffer[i].end & class->value)))) {
if (buffer[i].symbol & class->value) {
last_emph_symbol = i;
} else {
in_emph_word = 1;
}
}
if (in_pass) {
/* only whole capitalized words (words without lowercase letters) can be
* part of a passage (note that this also includes words without letters
* if the next word with letters is a whole word) */
if (!class->mode || (wordBuffer[i] & WORD_WHOLE)) {
last_pass_word_start = i;
pass_word_cnt++;
} else
goto end_passage;
}
} else /* check for word end indicator or word end */
if ((in_emph_word &&
(buffer[i].word & class->value &&
buffer[i].end & class->value)) ||
last_word_end == i) {
in_emph_word = 0;
if (in_pass) {
/* only whole capitalized words can be part of a passage */
last_pass_word_end = i;
}
}

if (in_pass) {
if (word_cnt >= emphRule[lenPhraseOffset]) {
if (pass_end >= 0) {
if (in_word) {
convertToPassage(
pass_start, i, word_start, buffer, class, table, wordBuffer);
} else {
convertToPassage(pass_start, pass_end, word_start, buffer, class,
table, wordBuffer);
/* check if possibly at beginning of passage */
if (!in_pass && (in_emph_word || last_emph_symbol == i)) {
/* only whole capitalized words can be part of a passage */
if (!class->mode || (wordBuffer[i] & WORD_WHOLE)) {
in_pass = 1;
pass_start = i;
last_pass_word_start = i;
last_pass_word_end = -1;
pass_word_cnt = 1;
}
} else /* check if at end of passage */
if (in_pass) {
if (in_word && !(in_emph_word || last_emph_symbol == i)) {
end_passage:
in_pass = 0;
if (last_pass_word_end < last_pass_word_start) {
last_pass_word_end = i;
}
/* it is a passage only if the number of words is greater than or equal to
* the minimum length (lencapsphrase / lenemphphrase) */
/* if the phrase closing indicator is placed before the last word and it
* was not a whole word, the minimum phrase length is increased */
if (!endphraseafter_defined && last_pass_word_end != last_word_end) {
pass_word_cnt--;
}
if (pass_word_cnt >= emphRule[lenPhraseOffset])
convertToPassage(pass_start, last_pass_word_end, last_pass_word_start,
buffer, class, table, wordBuffer);
} else if (i == input->length - 1) {
if (pass_word_cnt >= emphRule[lenPhraseOffset]) {
if (last_pass_word_end < last_pass_word_start) {
last_pass_word_end = input->length;
}
convertToPassage(pass_start, last_pass_word_end, last_pass_word_start,
buffer, class, table, wordBuffer);
}
}
}
Expand Down
1 change: 0 additions & 1 deletion tests/braille-specs/en-ueb-09-typeforms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,6 @@ tests:
- .7,brev;y is ! s|l ( wit4.',-,%akespe>e
- typeform:
italic: '+++++++++++++++++++++++++++ '
xfail: true

- - Remember Do unto others ...
- ',rememb} .7,d unto o!rs.'' 444'
Expand Down

0 comments on commit 30df4d9

Please sign in to comment.