Merge 198250f into 613994a

nvaccess · May 22, 2024 · e687938 · e687938
2 parents 613994a + 198250f
commit e687938
Show file tree

Hide file tree

Showing 5 changed files with 36 additions and 21 deletions.
diff --git a/source/braille.py b/source/braille.py
@@ -499,20 +499,20 @@ def update(self):
 			mode |= louis.compbrlAtCursor
 
 		converter: UnicodeNormalizationOffsetConverter | None = None
-		if config.conf["braille"]["unicodeNormalization"] and not isUnicodeNormalized(self.rawText):
-			converter = UnicodeNormalizationOffsetConverter(self.rawText)
+		textToTranslate = self.rawText
+		textToTranslateTypeforms = self.rawTextTypeforms
+		cursorPos = self.cursorPos
+		if config.conf["braille"]["unicodeNormalization"] and not isUnicodeNormalized(textToTranslate):
+			converter = UnicodeNormalizationOffsetConverter(textToTranslate)
 			textToTranslate = converter.encoded
-			# Typeforms must be adapted to represent normalized characters.
-			textToTranslateTypeforms = [
-				self.rawTextTypeforms[strOffset] for strOffset in converter.computedEncodedToStrOffsets
-			]
-			# Convert the cursor position to a normalized offset.
-			cursorPos = converter.strToEncodedOffsets(self.cursorPos)
-		else:
-			textToTranslate = self.rawText
-			textToTranslateTypeforms = self.rawTextTypeforms
-			cursorPos = self.cursorPos
-
+			if textToTranslateTypeforms is not None:
+				# Typeforms must be adapted to represent normalized characters.
+				textToTranslateTypeforms = [
+					textToTranslateTypeforms[strOffset] for strOffset in converter.computedEncodedToStrOffsets
+				]
+			if cursorPos is not None:
+				# Convert the cursor position to a normalized offset.
+				cursorPos = converter.strToEncodedOffsets(cursorPos)
 		self.brailleCells, brailleToRawPos, rawToBraillePos, self.brailleCursorPos = louisHelper.translate(
 			[handler.table.fileName, "braille-patterns.cti"],
 			textToTranslate,

diff --git a/source/config/configSpec.py b/source/config/configSpec.py
@@ -35,7 +35,7 @@
 	# symbolLevel: One of the characterProcessing.SymbolLevel values.
 	symbolLevel = integer(default=100)
 	trustVoiceLanguage = boolean(default=true)
-	unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled")
+	unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled")
 	includeCLDR = boolean(default=True)
 	beepSpeechModePitch = integer(default=10000,min=50,max=11025)
 	outputDevice = string(default=default)
@@ -83,7 +83,7 @@
 		optionsEnum="ReviewRoutingMovesSystemCaretFlag", behaviorOfDefault="NEVER")
 	readByParagraph = boolean(default=false)
 	wordWrap = boolean(default=true)
-	unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled")
+	unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled")
 	focusContextPresentation = option("changedContext", "fill", "scroll", default="changedContext")
 	interruptSpeechWhileScrolling = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled")
 	showSelection = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled")

diff --git a/source/textUtils.py b/source/textUtils.py
@@ -478,17 +478,24 @@ def _calculateOffsets(self) -> tuple[tuple[int], tuple[int]]:
 					# and still matches the beginning of the normalized buffer.
 					for i in range(len(originBuffer)):
 						originPart = originBuffer[: (i + 1)]
+						originPartLen = len(originPart)
 						normalizedPart = unicodedata.normalize(self.normalizationForm, originPart)
+						normalizedPartLen = len(normalizedPart)
 						if (
 							originPart == normalizedPart
 							or not normalizedBuffer.startswith(normalizedPart)
 						):
 							continue
-						originPartLen = len(originPart)
 						originBuffer = originBuffer[originPartLen:]
-						normalizedPartLen = len(normalizedPart)
 						normalizedBuffer = normalizedBuffer[normalizedPartLen:]
 						break
+					else:
+						# No normalizable characters in originBuffer.
+						# All characters are now copied to originPart and normalizedPart.
+						assert originBuffer == originPart
+						assert normalizedBuffer == normalizedPart
+						# Reset buffers to ensure the while loop doesn't run next time.
+						originBuffer = normalizedBuffer = ""
 					# Map the original indices to the normalized indices.
 					# originMultiplier is used to multiply indices in origin
 					# when a character takes more space in origin than in normalized.

diff --git a/tests/unit/test_textUtils.py b/tests/unit/test_textUtils.py
@@ -268,3 +268,11 @@ def test_normalizedOffsetsDifferentOrder(self):
 		self.assertSequenceEqual(converter.computedStrToEncodedOffsets, expectedStrToEncoded)
 		expectedEncodedToStr = (0, 2, 1, 3, 4, 5, 6, 8, 7, 9, 10)
 		self.assertSequenceEqual(converter.computedEncodedToStrOffsets, expectedEncodedToStr)
+
+	def test_normalizedOffsetsMixedSpaces(self):
+		text = "\xa0 "
+		converter = UnicodeNormalizationOffsetConverter(text, "NFKC")
+		expectedStrToEncoded = (0, 1)
+		self.assertSequenceEqual(converter.computedStrToEncodedOffsets, expectedStrToEncoded)
+		expectedEncodedToStr = (0, 1)
+		self.assertSequenceEqual(converter.computedEncodedToStrOffsets, expectedEncodedToStr)
diff --git a/user_docs/en/userGuide.md b/user_docs/en/userGuide.md
@@ -1809,8 +1809,8 @@ If you find that NVDA is reading punctuation in the wrong language for a particu
 ##### Unicode normalization {#SpeechUnicodeNormalization}
 | . {.hideHeaderRow} |.|
 |---|---|
-|Options |Default (Disabled), Enabled, Disabled|
-|Default |Disabled|
+|Options |Default (Enabled), Enabled, Disabled|
+|Default |Enabled|
 
 When this option is enabled, unicode normalization is performed on the text that is spoken by NVDA.
 This is beneficial when speaking characters that can be represented in several forms.
@@ -2071,8 +2071,8 @@ Enabling this may allow for more fluent reading, but generally requires you to s
 ##### Unicode normalization {#BrailleUnicodeNormalization}
 | . {.hideHeaderRow} |.|
 |---|---|
-|Options |Default (Disabled), Enabled, Disabled|
-|Default |Disabled|
+|Options |Default (Enabled), Enabled, Disabled|
+|Default |Enabled|
 
 When this option is enabled, unicode normalization is performed on the text that is brailled on the braille display.
 This is beneficial when coming across characters in braille that are unknown in a particular braille table and which have a compatible alternative, like the bold and italic characters commonly used on social media.