Skip to content

Commit

Permalink
Merge 198250f into 613994a
Browse files Browse the repository at this point in the history
  • Loading branch information
LeonarddeR committed May 22, 2024
2 parents 613994a + 198250f commit e687938
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 21 deletions.
26 changes: 13 additions & 13 deletions source/braille.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,20 +499,20 @@ def update(self):
mode |= louis.compbrlAtCursor

converter: UnicodeNormalizationOffsetConverter | None = None
if config.conf["braille"]["unicodeNormalization"] and not isUnicodeNormalized(self.rawText):
converter = UnicodeNormalizationOffsetConverter(self.rawText)
textToTranslate = self.rawText
textToTranslateTypeforms = self.rawTextTypeforms
cursorPos = self.cursorPos
if config.conf["braille"]["unicodeNormalization"] and not isUnicodeNormalized(textToTranslate):
converter = UnicodeNormalizationOffsetConverter(textToTranslate)
textToTranslate = converter.encoded
# Typeforms must be adapted to represent normalized characters.
textToTranslateTypeforms = [
self.rawTextTypeforms[strOffset] for strOffset in converter.computedEncodedToStrOffsets
]
# Convert the cursor position to a normalized offset.
cursorPos = converter.strToEncodedOffsets(self.cursorPos)
else:
textToTranslate = self.rawText
textToTranslateTypeforms = self.rawTextTypeforms
cursorPos = self.cursorPos

if textToTranslateTypeforms is not None:
# Typeforms must be adapted to represent normalized characters.
textToTranslateTypeforms = [
textToTranslateTypeforms[strOffset] for strOffset in converter.computedEncodedToStrOffsets
]
if cursorPos is not None:
# Convert the cursor position to a normalized offset.
cursorPos = converter.strToEncodedOffsets(cursorPos)
self.brailleCells, brailleToRawPos, rawToBraillePos, self.brailleCursorPos = louisHelper.translate(
[handler.table.fileName, "braille-patterns.cti"],
textToTranslate,
Expand Down
4 changes: 2 additions & 2 deletions source/config/configSpec.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
# symbolLevel: One of the characterProcessing.SymbolLevel values.
symbolLevel = integer(default=100)
trustVoiceLanguage = boolean(default=true)
unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled")
unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled")
includeCLDR = boolean(default=True)
beepSpeechModePitch = integer(default=10000,min=50,max=11025)
outputDevice = string(default=default)
Expand Down Expand Up @@ -83,7 +83,7 @@
optionsEnum="ReviewRoutingMovesSystemCaretFlag", behaviorOfDefault="NEVER")
readByParagraph = boolean(default=false)
wordWrap = boolean(default=true)
unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled")
unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled")
focusContextPresentation = option("changedContext", "fill", "scroll", default="changedContext")
interruptSpeechWhileScrolling = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled")
showSelection = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled")
Expand Down
11 changes: 9 additions & 2 deletions source/textUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,17 +478,24 @@ def _calculateOffsets(self) -> tuple[tuple[int], tuple[int]]:
# and still matches the beginning of the normalized buffer.
for i in range(len(originBuffer)):
originPart = originBuffer[: (i + 1)]
originPartLen = len(originPart)
normalizedPart = unicodedata.normalize(self.normalizationForm, originPart)
normalizedPartLen = len(normalizedPart)
if (
originPart == normalizedPart
or not normalizedBuffer.startswith(normalizedPart)
):
continue
originPartLen = len(originPart)
originBuffer = originBuffer[originPartLen:]
normalizedPartLen = len(normalizedPart)
normalizedBuffer = normalizedBuffer[normalizedPartLen:]
break
else:
# No normalizable characters in originBuffer.
# All characters are now copied to originPart and normalizedPart.
assert originBuffer == originPart
assert normalizedBuffer == normalizedPart
# Reset buffers to ensure the while loop doesn't run next time.
originBuffer = normalizedBuffer = ""
# Map the original indices to the normalized indices.
# originMultiplier is used to multiply indices in origin
# when a character takes more space in origin than in normalized.
Expand Down
8 changes: 8 additions & 0 deletions tests/unit/test_textUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,11 @@ def test_normalizedOffsetsDifferentOrder(self):
self.assertSequenceEqual(converter.computedStrToEncodedOffsets, expectedStrToEncoded)
expectedEncodedToStr = (0, 2, 1, 3, 4, 5, 6, 8, 7, 9, 10)
self.assertSequenceEqual(converter.computedEncodedToStrOffsets, expectedEncodedToStr)

def test_normalizedOffsetsMixedSpaces(self):
text = "\xa0 "
converter = UnicodeNormalizationOffsetConverter(text, "NFKC")
expectedStrToEncoded = (0, 1)
self.assertSequenceEqual(converter.computedStrToEncodedOffsets, expectedStrToEncoded)
expectedEncodedToStr = (0, 1)
self.assertSequenceEqual(converter.computedEncodedToStrOffsets, expectedEncodedToStr)
8 changes: 4 additions & 4 deletions user_docs/en/userGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -1809,8 +1809,8 @@ If you find that NVDA is reading punctuation in the wrong language for a particu
##### Unicode normalization {#SpeechUnicodeNormalization}
| . {.hideHeaderRow} |.|
|---|---|
|Options |Default (Disabled), Enabled, Disabled|
|Default |Disabled|
|Options |Default (Enabled), Enabled, Disabled|
|Default |Enabled|

When this option is enabled, unicode normalization is performed on the text that is spoken by NVDA.
This is beneficial when speaking characters that can be represented in several forms.
Expand Down Expand Up @@ -2071,8 +2071,8 @@ Enabling this may allow for more fluent reading, but generally requires you to s
##### Unicode normalization {#BrailleUnicodeNormalization}
| . {.hideHeaderRow} |.|
|---|---|
|Options |Default (Disabled), Enabled, Disabled|
|Default |Disabled|
|Options |Default (Enabled), Enabled, Disabled|
|Default |Enabled|

When this option is enabled, unicode normalization is performed on the text that is brailled on the braille display.
This is beneficial when coming across characters in braille that are unknown in a particular braille table and which have a compatible alternative, like the bold and italic characters commonly used on social media.
Expand Down

0 comments on commit e687938

Please sign in to comment.