Change how emphasis phrases are determined

Also allow non-whole words at the start and end of a phrase. Whether the last word contributes to the length of the phrase if it is not a whole word depends on whether endemphphrase after or before is used. For capitalization the behavior was not changed.
liblouis · Nov 24, 2023 · 30df4d9 · 30df4d9
1 parent 3206c8c
commit 30df4d9
Show file tree

Hide file tree

Showing 2 changed files with 86 additions and 64 deletions.
diff --git a/liblouis/lou_translateString.c b/liblouis/lou_translateString.c
@@ -2878,12 +2878,11 @@ convertToPassage(const int pass_start, const int pass_end, const int word_start,
 	const TranslationTableOffset *emphRule = table->emphRules[class->rule];
 	const TranslationTableRule *indicRule;
 
-	for (i = pass_start; i <= pass_end; i++)
-		if (wordBuffer[i] & WORD_WHOLE) {
-			buffer[i].symbol &= ~class->value;
-			buffer[i].word &= ~class->value;
-			wordBuffer[i] &= ~WORD_WHOLE;
-		}
+	for (i = pass_start; i <= pass_end; i++) {
+		buffer[i].symbol &= ~class->value;
+		buffer[i].word &= ~class->value;
+		wordBuffer[i] &= ~WORD_WHOLE;
+	}
 
 	buffer[pass_start].begin |= class->value;
 	if (brailleIndicatorDefined(emphRule[endOffset], table, &indicRule) ||
@@ -2912,68 +2911,92 @@ resolveEmphasisPassages(EmphasisInfo *buffer, const EmphasisClass *class,
 		const TranslationTableHeader *table, const InString *input,
 		unsigned int *wordBuffer) {
 	const TranslationTableOffset *emphRule = table->emphRules[class->rule];
-	unsigned int word_cnt = 0;
-	int pass_start = -1, pass_end = -1, word_start = -1, in_word = 0, in_pass = 0;
-	int i;
+	int in_word = 0, last_word_start = -1, last_word_end = -1;
+	int in_emph_word = 0, last_emph_symbol = -1;
+	int in_pass = 0, last_pass_word_start = -1, last_pass_word_end = -1, pass_start = -1;
+	unsigned int pass_word_cnt = 0;
+	int endphraseafter_defined = emphRule[endPhraseAfterOffset] || emphRule[endOffset];
 
-	for (i = 0; i < input->length; i++) {
-		/* check if at beginning of word (first character that is not a space) */
-		if (!in_word)
-			if (wordBuffer[i] & WORD_CHAR) {
-				in_word = 1;
-				/* only whole emphasized words can be part of a passage (in case of caps,
-				 * this also includes words without letters, but only if the next word
-				 * with letters is a whole word) */
-				if (wordBuffer[i] & WORD_WHOLE) {
-					if (!in_pass) {
-						in_pass = 1;
-						pass_start = i;
-						pass_end = -1;
-						word_cnt = 1;
-					} else
-						word_cnt++;
-					word_start = i;
-					continue;
-				} else if (in_pass) {
-					/* it is a passage only if the number of words is greater than or
-					 * equal to the minimum length (lencapsphrase / lenemphphrase) */
-					if (word_cnt >= emphRule[lenPhraseOffset])
-						if (pass_end >= 0) {
-							convertToPassage(pass_start, pass_end, word_start, buffer,
-									class, table, wordBuffer);
-						}
-					in_pass = 0;
-				}
-			}
+	for (int i = 0; i < input->length; i++) {
 
-		/* check if at end of word */
-		if (in_word)
-			if (!(wordBuffer[i] & WORD_CHAR)) {
-				in_word = 0;
-				if (in_pass) pass_end = i;
-			}
+		/* check if at beginning of word (words are determined by isEmphSpace() and
+		 * further refined at the beginning and end of words based on isEmphasizable()) */
+		if (!in_word && wordBuffer[i] & WORD_CHAR) {
+			in_word = 1;
+			last_word_start = i;
+		} else /* check if at end of word */
+				if (in_word && !(wordBuffer[i] & WORD_CHAR)) {
+			in_word = 0;
+			last_word_end = i;
+		}
 
-		if (in_pass)
-			if ((buffer[i].begin | buffer[i].end | buffer[i].word | buffer[i].symbol) &
-					class->value) {
-				if (word_cnt >= emphRule[lenPhraseOffset])
-					if (pass_end >= 0) {
-						convertToPassage(pass_start, pass_end, word_start, buffer, class,
-								table, wordBuffer);
-					}
-				in_pass = 0;
+		/* check for symbol or word indicator */
+		if (!in_emph_word &&
+				(buffer[i].symbol & class->value ||
+						(buffer[i].word & class->value &&
+								!(buffer[i].end & class->value)))) {
+			if (buffer[i].symbol & class->value) {
+				last_emph_symbol = i;
+			} else {
+				in_emph_word = 1;
 			}
-	}
+			if (in_pass) {
+				/* only whole capitalized words (words without lowercase letters) can be
+				 * part of a passage (note that this also includes words without letters
+				 * if the next word with letters is a whole word) */
+				if (!class->mode || (wordBuffer[i] & WORD_WHOLE)) {
+					last_pass_word_start = i;
+					pass_word_cnt++;
+				} else
+					goto end_passage;
+			}
+		} else /* check for word end indicator or word end */
+				if ((in_emph_word &&
+							(buffer[i].word & class->value &&
+									buffer[i].end & class->value)) ||
+						last_word_end == i) {
+			in_emph_word = 0;
+			if (in_pass) {
+				/* only whole capitalized words can be part of a passage */
+				last_pass_word_end = i;
+			}
+		}
 
-	if (in_pass) {
-		if (word_cnt >= emphRule[lenPhraseOffset]) {
-			if (pass_end >= 0) {
-				if (in_word) {
-					convertToPassage(
-							pass_start, i, word_start, buffer, class, table, wordBuffer);
-				} else {
-					convertToPassage(pass_start, pass_end, word_start, buffer, class,
-							table, wordBuffer);
+		/* check if possibly at beginning of passage */
+		if (!in_pass && (in_emph_word || last_emph_symbol == i)) {
+			/* only whole capitalized words can be part of a passage */
+			if (!class->mode || (wordBuffer[i] & WORD_WHOLE)) {
+				in_pass = 1;
+				pass_start = i;
+				last_pass_word_start = i;
+				last_pass_word_end = -1;
+				pass_word_cnt = 1;
+			}
+		} else /* check if at end of passage */
+				if (in_pass) {
+			if (in_word && !(in_emph_word || last_emph_symbol == i)) {
+			end_passage:
+				in_pass = 0;
+				if (last_pass_word_end < last_pass_word_start) {
+					last_pass_word_end = i;
+				}
+				/* it is a passage only if the number of words is greater than or equal to
+				 * the minimum length (lencapsphrase / lenemphphrase) */
+				/* if the phrase closing indicator is placed before the last word and it
+				 * was not a whole word, the minimum phrase length is increased */
+				if (!endphraseafter_defined && last_pass_word_end != last_word_end) {
+					pass_word_cnt--;
+				}
+				if (pass_word_cnt >= emphRule[lenPhraseOffset])
+					convertToPassage(pass_start, last_pass_word_end, last_pass_word_start,
+							buffer, class, table, wordBuffer);
+			} else if (i == input->length - 1) {
+				if (pass_word_cnt >= emphRule[lenPhraseOffset]) {
+					if (last_pass_word_end < last_pass_word_start) {
+						last_pass_word_end = input->length;
+					}
+					convertToPassage(pass_start, last_pass_word_end, last_pass_word_start,
+							buffer, class, table, wordBuffer);
 				}
 			}
 		}

diff --git a/tests/braille-specs/en-ueb-09-typeforms.yaml b/tests/braille-specs/en-ueb-09-typeforms.yaml
@@ -255,7 +255,6 @@ tests:
   - .7,brev;y is ! s|l ( wit4.',-,%akespe>e
   - typeform:
       italic: '+++++++++++++++++++++++++++            '
-    xfail: true
 
 - - Remember Do unto others ...
   - ',rememb} .7,d unto o!rs.'' 444'