Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
416 changes: 294 additions & 122 deletions python/AzureSpeechDetection/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,19 @@ class AcsSpeechDetectionProcessor(object):
def __init__(self):
self.acs = AzureConnection()

@staticmethod
def _convert_case_bcp(bcp:str)->str:
if not bcp:
return bcp
sep = '-'
if '_' in bcp:
sep = '_'
elif '-' not in bcp:
return bcp

lang, script = bcp.split(sep)
return f'{lang.lower()}{sep}{script.upper()}'

@staticmethod
def convert_word_timing(
recognized_phrases: Iterable[Mapping[str, Any]],
Expand Down Expand Up @@ -209,10 +222,10 @@ def process_audio(self, job_config: AzureJobConfig) -> List[mpf.AudioTrack]:
)

missing_models = set()
default_locale = job_config.language
default_locale = self._convert_case_bcp(job_config.language)
if (lang := job_config.override_default_language) is not None:
if lang in ISO6393_TO_BCP47:
for locale in ISO6393_TO_BCP47[lang]:
if lang.lower() in ISO6393_TO_BCP47:
for locale in ISO6393_TO_BCP47[lang.lower()]:
if locale in self.acs.supported_locales:
logger.debug(
f"Override default language ('{lang}') detected, "
Expand Down Expand Up @@ -241,18 +254,19 @@ def process_audio(self, job_config: AzureJobConfig) -> List[mpf.AudioTrack]:
locale = default_locale
if job_config.speaker is not None:
speaker_language_valid = False
if (lang := job_config.speaker.language) in ISO6393_TO_BCP47:
for locale in ISO6393_TO_BCP47[lang]:
if locale in self.acs.supported_locales:
speaker_language_valid = True
break
if (lang := job_config.speaker.language):
if lang.lower() in ISO6393_TO_BCP47:
for locale in ISO6393_TO_BCP47[lang.lower()]:
if locale in self.acs.supported_locales:
speaker_language_valid = True
break

if not speaker_language_valid:
missing_models.add(job_config.speaker.language)
ldict = job_config.speaker.language_scores
for lang in sorted(ldict.keys(), key=ldict.get, reverse=True):
if lang in ISO6393_TO_BCP47:
for locale in ISO6393_TO_BCP47[lang]:
if lang.lower() in ISO6393_TO_BCP47:
for locale in ISO6393_TO_BCP47[lang.lower()]:
if locale in self.acs.supported_locales:
logger.warning(
f"Language supplied in feed-forward track "
Expand Down Expand Up @@ -283,6 +297,7 @@ def process_audio(self, job_config: AzureJobConfig) -> List[mpf.AudioTrack]:
)
locale = default_locale

locale = self._convert_case_bcp(locale)
if locale not in self.acs.supported_locales:
raise mpf.DetectionException(
f"Selected locale ('{locale}') is not supported by Azure "
Expand Down
97 changes: 82 additions & 15 deletions python/AzureSpeechDetection/acs_speech_component/azure_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,74 +27,141 @@
# Dict of conversions from ISO639-3 language codes to BCP-47 codes. The first
# BCP-47 code in each value list will be used (the rest are primarily for
# reference in case of later changes or customization)

# Supported languages can be found here:
# https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt
ISO6393_TO_BCP47 = dict(
afr=["af-ZA"],
amh=["am-ET"],
ara=["ar-EG", "ar-SA", "ar-IQ", "ar-IL", "ar-AE", "ar-SY", "ar-LY", "ar-DZ",
"ar-BH", "ar-JO", "ar-KW", "ar-LB", "ar-MA", "ar-OM", "ar-PS", "ar-QA",
"ar-TN", "ar-YE"],
aze=["az-AZ"],
bel=["be-BY"],
ben=["bn-BD", "bn-IN"],
azj=["az-AZ"], # North Azerbaijani
azb=["az-AZ"], # South Azerbaijani
# bel=["be-BY"], # Deprecated
ben=["bn-IN"], # "bn-BD" Bengali-Bangladesh has been deprecated
# bod=["bo"], # Deprecated
bul=["bg-BG"],
bos=["bs-BA"],
cat=["ca-ES"],
# ceb=["ceb"],
ces=["cs-CZ"],
cmn=["zh-CN"],
cze=["cs-CZ"],# ISO-639-2 Variant
cym=["cy-GB"],
wel=["cy-GB"],# ISO-639-2 Variant
dan=["da-DK"], # Note: There is a related dialect JUT - Jutlandic
jut=["da-DK"], # Upon further research, Jutlantic is present in Denmark
# but declining over time.
deu=["de-DE", "de-AT", "de-CH"],
# Many other forms of German exist
gsw=["de-CH"], # Swiss German
bar=["de-AT"], # Bavarian / Upper German variant common in most of Austria
ell=["el-GR"],
eng=["en-US", "en-CA", "en-GB", "en-AU", "en-GH", "en-HK", "en-IN", "en-IE",
"en-KE", "en-NZ", "en-NG", "en-PH", "en-SG", "en-ZA", "en-TZ"],
fra=["fr-FR", "fr-CA", "fr-CH"],
est=["et-EE"], # Estonian (Inclusive)
ekk=["et-EE"], # Standard Estonian
# vro=["et-EE"], Voro, doesn't seem to be direct match
eus=["eu-ES"],
fas=["fa-IR"],
fin=["fi-FI"],
fil=["fil-PH"],
fra=["fr-FR", "fr-BE", "fr-CA", "fr-CH"],
gle=["ga-IE"],
glg=["gl-ES"],
guj=["gu-IN"],
heb=["he-IL"],
hin=["hi-IN"],
hrv=["hr-HR"],
hun=["hu-HU"],
# ohu=["hu-HU"], # Note: Old-Hungarian, might not fully work with modern "hu-HU"
# gug=["gn"], # Deprecated
# hat=[],
# hau=["ha"], # Deprecated
# hbs=["sh"], # Deprecated
hin=["hi-IN"],
# hye=["hy"],
ita=["it-IT", "it-CH"],
ind=["id-ID"],
ice=["is-IS"],
isl=["is-IS"],
jav=["jv-ID"],
jpn=["ja-JP"],
kat=["ka-GE"],
kaz=["kk-KZ"],
kir=["ky-KG"],
khm=["km-KH"],
kxm=["km-KH"], # Northern Khmer, might not work as well.
kan=["kn-IN"],
# kir=["ky-KG"], # Deprecated
kor=["ko-KR"],
# kur=["ku"], # Deprecated
lao=["lo-LA"],
lit=["lt-LT"],
lav=["lv-LV"],
lvs=["lv-LV"], # Standard Latvian
# luo=[],
mkd=["mk-MK"],
mya=["my-MM"],
nan=["zh-TW", "nan-TW"],
mal=["ml-IN"],
mon=["mn-MN"], # Mongolian (Inclusive)
khk=["mn-MN"], # Khalkha Mongolian (Predominant)
mvf=["mn-MN"], # Peripheral Mongolian (Part)
mar=["mr-IN"],
zsm=["ms-MY"],
mlt=["mt-MT"],
nob=["nb-NO"],
nep=["ne-NP"], # Nepali (Macrolanguage)
npi=["ne-NP"], # Nepali
nld=["nl-NL", "nl-BE"], # Netherlands and Belgium
# omr=["mr-IN"], # Old Maranthi, might not work
# nde=["nd"],
# orm=["om"],
pan=["pa-IN"],
pes=["fa-IR"],
pol=["pl-PL"],
por=["pt-BR", "pt-PT"],
prs=["prs-AF"],
pus=["pa-AF"],
ron=["ro-RO", "ro-MD"],
por=["pt-BR", "pt-PT"], # pt-BR = Portuguese Brazil, pt-PT = Portuguese Portugal
pus=["ps-AF"], # Pashto, Pushto (Inclusive)
pbu=["ps-AF"], # Northern Pahsto
pst=["ps-AF"], # Central Pahsto
pbt=["ps-AF"], # Southern Pahsto
sin=["si-LK"],
# prs=["prs-AF"], # Deprecated
# pus=["pa-AF"], # Deprecated
ron=["ro-RO"], # ro-MD deprecated
# run=[],
rus=["ru-RU"],
slk=["sk-SK"],
slv=["sl-SI"],
# sna=["sn"],
som=["so-SO"],
spa=["es-MX", "es-US", "es-AR", "es-BO", "es-CL", "es-CO", "es-CR", "es-CU",
"es-DO", "es-EC", "es-SV", "es-GQ", "es-GT", "es-HN", "es-NI", "es-PA",
"es-PY", "es-PE", "es-PR", "es-ES", "es-UY", "es-VE"],

sqi=["sq-AL"],
swa=["sw-KE", "sw-TZ"],
swe=["sv-SE"],
srp=["sr-RS"],
tam=["ta-IN"],
tel=["te-IN"],
# wbq = ["te-IN"], Waddar/Vadari is related to Telugu.
# tat=[],
tgk=["tg-TJ"],
tgl=["fil-PH", "tl-PH"],
# tgk=["tg-TJ"], # Deprecated
tgl=["fil-PH"], # "tl-PH" deprecated
tha=["th-TH"],
# tir=[],
tpi=["tpi-PG"],
# tpi=["tpi-PG"], # Deprecated
tur=["tr-TR"],
ukr=["uk-UA"],
urd=["ur-IN"],
uzb=["uz-UZ"],
vie=["vi-VN"],
yue=["zh-HK", "yue-CN"],
cmn=["zh-CN", "zh-CN-shandong", "zh-CN-sichuan", "zh-HK", "zh-TW"],
zho=["zh-CN", "zh-CN-shandong", "zh-CN-sichuan", "zh-HK", "zh-TW"],
yue=["yue-CN", "zh-HK"], # Cantonese
wuu=["wuu-CN"],
nan=["zh-TW"], # nan-TW deprecated
# Note, Taiwanese has one standard + one major dialect,
# not sure which is covered better by Azure.
zul=["zu-ZA"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"description": "Uses Azure Cognitive Services to perform speech-to-text.",
"actionType": "DETECTION",
"trackType": "SPEECH",
"outputChangedCounter" : 2,
"outputChangedCounter": 2,
"requiresCollection": {
"states": []
},
Expand Down Expand Up @@ -59,7 +59,7 @@
},
{
"name": "LANGUAGE",
"description": "The language/locale to use for transcription.",
"description": "The language/locale, in BCP-47 format, to use for transcription. Please consult README to review Azure's supported list of BCP-47 codes.",
"type": "STRING",
"defaultValue": "en-US"
},
Expand Down Expand Up @@ -193,4 +193,4 @@
]
}
]
}
}
6 changes: 3 additions & 3 deletions python/AzureSpeechDetection/tests/test_acs_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def test_audio_file(self):
stop_time=-1,
job_properties=get_test_properties(
DIARIZE='FALSE',
LANGUAGE='en-US',
LANGUAGE='EN-us',
USE_SAS_AUTH='TRUE'
),
media_properties={},
Expand All @@ -137,7 +137,7 @@ def test_video_file(self):
stop_frame=-1,
job_properties=get_test_properties(
DIARIZE='FALSE',
LANGUAGE='en-US'
LANGUAGE='En-Us'
),
media_properties=dict(
FPS='24'
Expand Down Expand Up @@ -204,7 +204,7 @@ def test_language(self):
stop_time=-1,
job_properties=get_test_properties(
DIARIZE='TRUE',
LANGUAGE='en-US'
LANGUAGE='en-us'
),
media_properties={},
feed_forward_track=None
Expand Down
Loading