Permalink
Browse files

Use Unicode CLDR to create speech symbol dictionaries with emojis (#8758

)

* Add emoji dictionaries as a git submodule

* Scons implementation

* Load emoji dictionaries in characterProcessing

* Add speak emoji descriptions entry to settings

* Support multiple emoji sources per locale

* Update emojiDict_sconscript

* Rename emoji dictionaries to cldr

* Cache locales for which no symmbols are available

* Clear CLDR data when saving the option is changed

* Handle config profile switches properly for CLDR data

* Update copyright

* Revert unnecessary change to sconstruct

* User guide

* Remove all eSpeak emoji dictsource files before compiling eSpeak

* Update what's new
  • Loading branch information...
leonardder authored and michaelDCurran committed Sep 25, 2018
1 parent 7b133e6 commit 21065faf54c043db4f887845c097e14f7f7a2561
View
@@ -39,3 +39,4 @@ uninstaller/UAC.nsh
*.pyo
*.dmp
tests/unit/nvda.ini
source/locale/*/cldr.dic
View
@@ -10,6 +10,7 @@
[submodule "include/espeak"]
path = include/espeak
url=https://github.com/espeak-ng/espeak-ng
ignore = dirty
[submodule "include/sonic"]
path = include/sonic
url = https://github.com/waywardgeek/sonic.git
@@ -34,3 +35,6 @@
[submodule "include/configobj"]
path = include/configobj
url = https://github.com/DiffSK/configobj.git
[submodule "include/cldr-emoji-annotation"]
path = include/cldr-emoji-annotation
url = https://github.com/fujiwarat/cldr-emoji-annotation
View
@@ -0,0 +1,124 @@
###
#This file is a part of the NVDA project.
#URL: http://www.nvda-project.org/
#Copyright 2018 NV Access Limited, Babbage B.V.
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License version 2.0, as published by
#the Free Software Foundation.
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#This license can be found at:
#http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
###
Import(
'env', 'sourceDir',
)
def createCLDRAnnotationsDict(sources, dest):
import codecs
from xml.etree import ElementTree
from collections import OrderedDict
cldrDict = OrderedDict()
for source in sources:
tree = ElementTree.parse(source)
for element in tree.iter("annotation"):
if element.attrib.get("type") == "tts":
cldrDict[element.attrib['cp']] = element.text.replace(":","")
assert cldrDict, "cldrDict is empty"
with codecs.open(dest, "w", "utf_8_sig", errors="replace") as dictFile:
dictFile.write(u"symbols:\r\n")
for pattern, description in cldrDict.iteritems():
dictFile.write(u"{pattern}\t{description}\tsome\r\n".format(
pattern=pattern,
description=description
))
cldrDictAction=env.Action(
lambda target,source,env: createCLDRAnnotationsDict([src.path for src in source], target[0].path),
lambda target,source,env: 'Generating %s'%target[0],
)
cldrDictBuilder=env.Builder(
action=cldrDictAction,
suffix='.dic',
src_suffix='.xml',
)
env['BUILDERS']['cldrDict']=cldrDictBuilder
NVDAToCLDRLocales = {
"af_ZA":("af",),
"am":("am",),
#"an":(),
"ar":("ar",),
"as":("as",),
"bg":("bg",),
"bn":("bn",),
"ca":("ca",),
#"ckb":(),
"cs":("cs",),
"da":("da",),
"de":("de",),
"de_CH":("de_CH",),
"el":("el",),
"en":("en_001","en"),
"es":("es",),
#"es_CO":(),
"fa":("fa",),
"fi":("fi",),
"fr":("fr",),
"ga":("ga",),
"gl":("gl",),
"gu":("gu",),
"he":("he",),
"hi":("hi",),
"hr":("hr",),
"hu":("hu",),
"id":("id",),
"is":("is",),
"it":("it",),
"ja":("ja",),
"ka":("ka",),
#"kmr":(),
"kn":("kn",),
"ko":("ko",),
#"kok":(),
"ky":("ky",),
"lt":("lt",),
"mk":("mk",),
"ml":("ml",),
"mn":("mn",),
#"mni":(),
"my":("my",),
"nb_NO":("nb",),
"ne":("ne",),
"nl":("nl",),
"nn_NO":("nn",),
"pa":("pa",),
"pl":("pl",),
"pt_BR":("pt",),
"pt_pt":("pt","pt_PT"),
"ro":("ro",),
"ru":("ru",),
"sk":("sk",),
"sl":("sl",),
"sq":("sq",),
"sr":("sr",),
"sv":("sv",),
"ta":("ta",),
"te":("te",),
"th":("th",),
"tr":("tr",),
"uk":("uk",),
}
annotationsDir = env.Dir("include/cldr-emoji-annotation/annotations")
annotationsDerivedDir = env.Dir("include/cldr-emoji-annotation/annotationsDerived")
for destLocale, sourceLocales in NVDAToCLDRLocales.iteritems():
cldrSources = []
# First add all annotations, then the derived ones.
for sourceLocale in sourceLocales:
cldrSources.append(annotationsDir.File("%s.xml" % sourceLocale))
for sourceLocale in sourceLocales:
cldrSources.append(annotationsDerivedDir.File("%s.xml" % sourceLocale))
env.cldrDict(sourceDir.Dir("locale/%s" % destLocale).File("cldr.dic"), cldrSources)
@@ -157,6 +157,13 @@ env.Install(espeakRepo.Dir('dictsource'),env.Glob(os.path.join(espeakRepo.abspat
#Compile all dictionaries
missingDicts=['zhy', ] #'mt','tn','tt']
dictSourcePath=espeakRepo.Dir('dictsource').abspath
# Remove emoji files before compiling dictionaries.
# Currently many of these simply crash eSpeak at runtime.
# Also, our own emoji processing using CLDR data is preferred.
emojiGlob = os.path.join(espeakRepo.abspath,'dictsource','*_emoji')
for f in glob(emojiGlob):
print("Removing emoji file: %s"%f)
os.remove(f)
for f in env.Glob(os.path.join(dictSourcePath,'*_rules')):
lang=f.name.split('_')[0]
if lang in missingDicts: continue
View
@@ -212,6 +212,9 @@ env.SideEffect('_txt2tags',htmlFile)
devGuide=env.Command(devDocsOutputDir.File('developerGuide.html'),htmlFile,Move('$TARGET','$SOURCE'))
env.Alias("developerGuide",devGuide)
# Build unicode CLDR dictionaries
env.SConscript('cldrDict_sconscript',exports=['env', 'sourceDir'])
# A builder to generate an NVDA distribution.
def NVDADistGenerator(target, source, env, for_signature):
buildVersionFn = os.path.join(str(source[0]), "_buildVersion.py")
@@ -1,6 +1,6 @@
#characterProcessing.py
#A part of NonVisual Desktop Access (NVDA)
#Copyright (C) 2010-2011 NV Access Inc, World Light Information Limited, Hong Kong Blind Union
#Copyright (C) 2010-2018 NV Access Limited, World Light Information Limited, Hong Kong Blind Union, Babbage B.V.
#This file is covered by the GNU General Public License.
#See the file COPYING for more details.
@@ -11,6 +11,7 @@
import re
from logHandler import log
import globalVars
import config
class LocaleDataMap(object):
"""Allows access to locale-specific data objects, dynamically loading them if needed on request"""
@@ -59,6 +60,12 @@ def invalidateLocaleData(self, locale):
except KeyError:
pass
def invalidateAllData(self):
"""Invalidate all data within this locale map.
This will cause a new data object to be created for every locale that is next requested.
"""
self._dataMap.clear()
class CharacterDescriptions(object):
"""
Represents a map of characters to one or more descriptions (examples) for that character.
@@ -116,7 +123,7 @@ def getCharacterDescription(locale,character):
if not desc and not locale.startswith('en'):
desc=getCharacterDescription('en',character)
return desc
# Speech symbol levels
SYMLVL_NONE = 0
SYMLVL_SOME = 100
@@ -350,11 +357,24 @@ def _saveSymbol(self, symbol):
fields.append("# %s" % symbol.displayName)
return u"\t".join(fields)
_noSymbolLocalesCache = set()
def _getSpeechSymbolsForLocale(locale):
if locale in _noSymbolLocalesCache:
raise LookupError
builtin = SpeechSymbols()
if config.conf['speech']['includeCLDR']:
# Try to load CLDR data when processing is on.
# Load the data before loading other symbols,
# in order to allow translators to override them.
try:
builtin.load(os.path.join("locale", locale, "cldr.dic"),
allowComplexSymbols=False)
except IOError:
log.debugWarning("No CLDR data for locale %s" % locale)
try:
builtin.load(os.path.join("locale", locale, "symbols.dic"))
except IOError:
_noSymbolLocalesCache.add(locale)
raise LookupError("No symbol information for locale %s" % locale)
user = SpeechSymbols()
try:
@@ -632,3 +652,19 @@ def processSpeechSymbol(locale, symbol):
except KeyError:
pass
return symbol
def clearSpeechSymbols():
"""Clears the symbol data cached by the locale speech symbol processors.
This will cause new data to be fetched for the next request to pronounce symbols.
"""
SpeechSymbolProcessor.localeSymbols.invalidateAllData()
_localeSpeechSymbolProcessors.invalidateAllData()
def handlePostConfigProfileSwitch(prevConf=None):
if not prevConf:
return
if prevConf["speech"]["includeCLDR"] is not config.conf["speech"]["includeCLDR"]:
# Either included or excluded CLDR data, so clear the cache.
clearSpeechSymbols()
config.post_configProfileSwitch.register(handlePostConfigProfileSwitch)
View
@@ -364,13 +364,14 @@ def _handleProfileSwitch(self):
if not self._shouldHandleProfileSwitch:
self._pendingHandleProfileSwitch = True
return
init = self.rootSection is None
currentRootSection = self.rootSection
init = currentRootSection is None
# Reset the cache.
self.rootSection = AggregatedSection(self, (), self.spec, self.profiles)
if init:
# We're still initialising, so don't notify anyone about this change.
return
post_configProfileSwitch.notify()
post_configProfileSwitch.notify(prevConf=currentRootSection.dict())
def _initBaseConf(self, factoryDefaults=False):
fn = os.path.join(globalVars.appArgs.configPath, "nvda.ini")
@@ -440,6 +441,9 @@ def get(self, key, default=None):
def __setitem__(self, key, val):
self.rootSection[key] = val
def dict(self):
return self.rootSection.dict()
def listProfiles(self):
for name in os.listdir(os.path.join(globalVars.appArgs.configPath, "profiles")):
name, ext = os.path.splitext(name)
@@ -900,13 +904,13 @@ def _cacheLeaf(self, key, spec, val):
self._cache[key] = val
return val
def iteritems(self):
def __iter__(self):
keys = set()
# Start with the cached items.
for key, val in self._cache.iteritems():
keys.add(key)
if val is not KeyError:
yield key, val
yield key
# Walk through the profiles and spec looking for items not yet cached.
for profile in itertools.chain(reversed(self.profiles), (self._spec,)):
if not profile:
@@ -915,16 +919,37 @@ def iteritems(self):
if key in keys:
continue
keys.add(key)
# Use __getitem__ so caching, AggregatedSections, etc. are handled.
try:
yield key, self[key]
except KeyError:
# This could happen if the item is in the spec but there's no default.
pass
yield key
def iteritems(self):
for key in self:
try:
yield (key, self[key])
except KeyError:
# This could happen if the item is in the spec but there's no default.
pass
def copy(self):
return dict(self.iteritems())
def dict(self):
"""Return a deepcopy of self as a dictionary.
Adapted from L{configobj.Section.dict}.
"""
newdict = {}
for entry in self:
this_entry = self[entry]
if isinstance(this_entry, AggregatedSection):
this_entry = this_entry.dict()
elif isinstance(this_entry, list):
# create a copy rather than a reference
this_entry = list(this_entry)
elif isinstance(this_entry, tuple):
# create a copy rather than a reference
this_entry = tuple(this_entry)
newdict[entry] = this_entry
return newdict
def __setitem__(self, key, val):
spec = self._spec.get(key) if self.spec else None
if isinstance(spec, dict) and not isinstance(val, dict):
@@ -32,6 +32,7 @@
synth = string(default=auto)
symbolLevel = integer(default=100)
trustVoiceLanguage = boolean(default=true)
includeCLDR = boolean(default=True)
beepSpeechModePitch = integer(default=10000,min=50,max=11025)
outputDevice = string(default=default)
autoLanguageSwitching = boolean(default=true)
@@ -1095,6 +1095,13 @@ def makeSettings(self, settingsSizer):
self.trustVoiceLanguageCheckbox = settingsSizerHelper.addItem(wx.CheckBox(self,label=trustVoiceLanguageText))
self.trustVoiceLanguageCheckbox.SetValue(config.conf["speech"]["trustVoiceLanguage"])
# Translators: This is the label for a checkbox in the
# voice settings panel (if checked, data from the unicode CLDR will be used
# to speak emoji descriptions).
includeCLDRText = _("Include Unicode Consortium data (including emoji) when processing characters and symbols")
self.includeCLDRCheckbox = settingsSizerHelper.addItem(wx.CheckBox(self,label=includeCLDRText))
self.includeCLDRCheckbox.SetValue(config.conf["speech"]["includeCLDR"])
# Translators: This is a label for a setting in voice settings (an edit box to change voice pitch for capital letters; the higher the value, the pitch will be higher).
capPitchChangeLabelText=_("Capital pitch change percentage")
self.capPitchChangeEdit=settingsSizerHelper.addLabeledControl(capPitchChangeLabelText, nvdaControls.SelectOnFocusSpinCtrl,
@@ -1179,6 +1186,11 @@ def onSave(self):
config.conf["speech"]["autoDialectSwitching"]=self.autoDialectSwitchingCheckbox.IsChecked()
config.conf["speech"]["symbolLevel"]=characterProcessing.CONFIGURABLE_SPEECH_SYMBOL_LEVELS[self.symbolLevelList.GetSelection()]
config.conf["speech"]["trustVoiceLanguage"]=self.trustVoiceLanguageCheckbox.IsChecked()
currentIncludeCLDR = config.conf["speech"]["includeCLDR"]
config.conf["speech"]["includeCLDR"] = newIncludeCldr = self.includeCLDRCheckbox.IsChecked()
if currentIncludeCLDR is not newIncludeCldr:
# Either included or excluded CLDR data, so clear the cache.
characterProcessing.clearSpeechSymbols()
config.conf["speech"][synth.name]["capPitchChange"]=self.capPitchChangeEdit.Value
config.conf["speech"][synth.name]["sayCapForCapitals"]=self.sayCapForCapsCheckBox.IsChecked()
config.conf["speech"][synth.name]["beepForCapitals"]=self.beepForCapsCheckBox.IsChecked()
View
@@ -8,6 +8,7 @@ What's New in NVDA
== New Features ==
- New braille tables: Chinese (China, Mandarin) grade 1 and grade 2. (#5553)
- Replied / Forwarded status is now reported on mail items in the Microsoft Outlook message list. (#6911)
- NVDA is now able to read descriptions for emoji as well as other characters that are part of the Unicode Common Locale Data Repository. (#6523)
== Changes ==
@@ -32,6 +33,7 @@ What's New in NVDA
- If you need to make a wx widget accessible which isn't already, it is possible to do so by using an instance of gui.accPropServer.IAccPropServer_impl. (#7491)
- See the implementation of gui.nvdaControls.ListCtrlAccPropServer for more info.
- Updated configobj to 5.1.0dev commit 5b5de48a. (#4470)
- The config.post_configProfileSwitch action now takes the optional prevConf keyword argument, allowing handlers to take action based on differences between configuration before and after the profile switch. (#8758)
= 2018.3.1 =
Oops, something went wrong.

0 comments on commit 21065fa

Please sign in to comment.