openmpf · jrobble · Apr 8, 2024 · Apr 8, 2024 · Apr 8, 2024
diff --git a/python/AzureSpeechDetection/README.md b/python/AzureSpeechDetection/README.md
diff --git a/python/AzureSpeechDetection/acs_speech_component/acs_speech_processor.py b/python/AzureSpeechDetection/acs_speech_component/acs_speech_processor.py
@@ -70,6 +70,19 @@ class AcsSpeechDetectionProcessor(object):
     def __init__(self):
         self.acs = AzureConnection()
 
+    @staticmethod
+    def _convert_case_bcp(bcp:str)->str:
+        if not bcp:
+            return bcp
+        sep = '-'
+        if '_' in bcp:
+            sep = '_'
+        elif '-' not in bcp:
+            return bcp
+
+        lang, script = bcp.split(sep)
+        return f'{lang.lower()}{sep}{script.upper()}'
+
     @staticmethod
     def convert_word_timing(
                 recognized_phrases: Iterable[Mapping[str, Any]],
@@ -209,10 +222,10 @@ def process_audio(self, job_config: AzureJobConfig) -> List[mpf.AudioTrack]:
             )
 
         missing_models = set()
-        default_locale = job_config.language
+        default_locale = self._convert_case_bcp(job_config.language)
         if (lang := job_config.override_default_language) is not None:
-            if lang in ISO6393_TO_BCP47:
-                for locale in ISO6393_TO_BCP47[lang]:
+            if lang.lower() in ISO6393_TO_BCP47:
+                for locale in ISO6393_TO_BCP47[lang.lower()]:
                     if locale in self.acs.supported_locales:
                         logger.debug(
                             f"Override default language ('{lang}') detected, "
@@ -241,18 +254,19 @@ def process_audio(self, job_config: AzureJobConfig) -> List[mpf.AudioTrack]:
         locale = default_locale
         if job_config.speaker is not None:
             speaker_language_valid = False
-            if (lang := job_config.speaker.language) in ISO6393_TO_BCP47:
-                for locale in ISO6393_TO_BCP47[lang]:
-                    if locale in self.acs.supported_locales:
-                        speaker_language_valid = True
-                        break
+            if (lang := job_config.speaker.language):
+                if lang.lower() in ISO6393_TO_BCP47:
+                    for locale in ISO6393_TO_BCP47[lang.lower()]:
+                        if locale in self.acs.supported_locales:
+                            speaker_language_valid = True
+                            break
 
             if not speaker_language_valid:
                 missing_models.add(job_config.speaker.language)
                 ldict = job_config.speaker.language_scores
                 for lang in sorted(ldict.keys(), key=ldict.get, reverse=True):
-                    if lang in ISO6393_TO_BCP47:
-                        for locale in ISO6393_TO_BCP47[lang]:
+                    if lang.lower() in ISO6393_TO_BCP47:
+                        for locale in ISO6393_TO_BCP47[lang.lower()]:
                             if locale in self.acs.supported_locales:
                                 logger.warning(
                                     f"Language supplied in feed-forward track "
@@ -283,6 +297,7 @@ def process_audio(self, job_config: AzureJobConfig) -> List[mpf.AudioTrack]:
                     )
                     locale = default_locale
 
+        locale = self._convert_case_bcp(locale)
         if locale not in self.acs.supported_locales:
             raise mpf.DetectionException(
                 f"Selected locale ('{locale}') is not supported by Azure "

diff --git a/python/AzureSpeechDetection/acs_speech_component/azure_utils.py b/python/AzureSpeechDetection/acs_speech_component/azure_utils.py
@@ -27,74 +27,141 @@
 # Dict of conversions from ISO639-3 language codes to BCP-47 codes. The first
 #  BCP-47 code in each value list will be used (the rest are primarily for
 #  reference in case of later changes or customization)
+
+# Supported languages can be found here:
+# https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt
 ISO6393_TO_BCP47 = dict(
+    afr=["af-ZA"],
     amh=["am-ET"],
     ara=["ar-EG", "ar-SA", "ar-IQ", "ar-IL", "ar-AE", "ar-SY", "ar-LY", "ar-DZ",
          "ar-BH", "ar-JO", "ar-KW", "ar-LB", "ar-MA", "ar-OM", "ar-PS", "ar-QA",
          "ar-TN", "ar-YE"],
     aze=["az-AZ"],
-    bel=["be-BY"],
-    ben=["bn-BD", "bn-IN"],
+    azj=["az-AZ"], # North Azerbaijani
+    azb=["az-AZ"], # South Azerbaijani
+    # bel=["be-BY"], # Deprecated
+    ben=["bn-IN"], # "bn-BD" Bengali-Bangladesh has been deprecated
     # bod=["bo"], # Deprecated
     bul=["bg-BG"],
+    bos=["bs-BA"],
+    cat=["ca-ES"],
     # ceb=["ceb"],
     ces=["cs-CZ"],
-    cmn=["zh-CN"],
+    cze=["cs-CZ"],# ISO-639-2 Variant
+    cym=["cy-GB"],
+    wel=["cy-GB"],# ISO-639-2 Variant
+    dan=["da-DK"], # Note: There is a related dialect JUT - Jutlandic
+    jut=["da-DK"], # Upon further research, Jutlantic is present in Denmark
+                   # but declining over time.
+    deu=["de-DE", "de-AT", "de-CH"],
+    # Many other forms of German exist
+    gsw=["de-CH"], # Swiss German
+    bar=["de-AT"], # Bavarian / Upper German variant common in most of Austria
     ell=["el-GR"],
     eng=["en-US", "en-CA", "en-GB", "en-AU", "en-GH", "en-HK", "en-IN", "en-IE",
          "en-KE", "en-NZ", "en-NG", "en-PH", "en-SG", "en-ZA", "en-TZ"],
-    fra=["fr-FR", "fr-CA", "fr-CH"],
+    est=["et-EE"], # Estonian (Inclusive)
+    ekk=["et-EE"], # Standard Estonian
+    # vro=["et-EE"], Voro, doesn't seem to be direct match
+    eus=["eu-ES"],
+    fas=["fa-IR"],
+    fin=["fi-FI"],
+    fil=["fil-PH"],
+    fra=["fr-FR", "fr-BE", "fr-CA", "fr-CH"],
+    gle=["ga-IE"],
+    glg=["gl-ES"],
+    guj=["gu-IN"],
+    heb=["he-IL"],
+    hin=["hi-IN"],
+    hrv=["hr-HR"],
+    hun=["hu-HU"],
+    # ohu=["hu-HU"], # Note: Old-Hungarian, might not fully work with modern "hu-HU"
     # gug=["gn"], # Deprecated
     # hat=[],
     # hau=["ha"], # Deprecated
     # hbs=["sh"], # Deprecated
-    hin=["hi-IN"],
     # hye=["hy"],
+    ita=["it-IT", "it-CH"],
     ind=["id-ID"],
+    ice=["is-IS"],
+    isl=["is-IS"],
     jav=["jv-ID"],
     jpn=["ja-JP"],
     kat=["ka-GE"],
     kaz=["kk-KZ"],
-    kir=["ky-KG"],
+    khm=["km-KH"],
+    kxm=["km-KH"], # Northern Khmer, might not work as well.
+    kan=["kn-IN"],
+    # kir=["ky-KG"], # Deprecated
     kor=["ko-KR"],
     # kur=["ku"], # Deprecated
     lao=["lo-LA"],
     lit=["lt-LT"],
+    lav=["lv-LV"],
+    lvs=["lv-LV"], # Standard Latvian
     # luo=[],
     mkd=["mk-MK"],
     mya=["my-MM"],
-    nan=["zh-TW", "nan-TW"],
+    mal=["ml-IN"],
+    mon=["mn-MN"], # Mongolian (Inclusive)
+    khk=["mn-MN"], # Khalkha Mongolian (Predominant)
+    mvf=["mn-MN"], # Peripheral Mongolian (Part)
+    mar=["mr-IN"],
+    zsm=["ms-MY"],
+    mlt=["mt-MT"],
+    nob=["nb-NO"],
+    nep=["ne-NP"], # Nepali (Macrolanguage)
+    npi=["ne-NP"], # Nepali
+    nld=["nl-NL", "nl-BE"], # Netherlands and Belgium
+    # omr=["mr-IN"], # Old Maranthi, might not work
     # nde=["nd"],
     # orm=["om"],
     pan=["pa-IN"],
     pes=["fa-IR"],
     pol=["pl-PL"],
-    por=["pt-BR", "pt-PT"],
-    prs=["prs-AF"],
-    pus=["pa-AF"],
-    ron=["ro-RO", "ro-MD"],
+    por=["pt-BR", "pt-PT"], # pt-BR = Portuguese Brazil, pt-PT = Portuguese Portugal
+    pus=["ps-AF"], # Pashto, Pushto (Inclusive)
+    pbu=["ps-AF"], # Northern Pahsto
+    pst=["ps-AF"], # Central Pahsto
+    pbt=["ps-AF"], # Southern Pahsto
+    sin=["si-LK"],
+    # prs=["prs-AF"], # Deprecated
+    # pus=["pa-AF"], # Deprecated
+    ron=["ro-RO"],  # ro-MD deprecated
     # run=[],
     rus=["ru-RU"],
     slk=["sk-SK"],
+    slv=["sl-SI"],
     # sna=["sn"],
     som=["so-SO"],
     spa=["es-MX", "es-US", "es-AR", "es-BO", "es-CL", "es-CO", "es-CR", "es-CU",
          "es-DO", "es-EC", "es-SV", "es-GQ", "es-GT", "es-HN", "es-NI", "es-PA",
          "es-PY", "es-PE", "es-PR", "es-ES", "es-UY", "es-VE"],
+
     sqi=["sq-AL"],
     swa=["sw-KE", "sw-TZ"],
+    swe=["sv-SE"],
+    srp=["sr-RS"],
     tam=["ta-IN"],
+    tel=["te-IN"],
+    # wbq = ["te-IN"], Waddar/Vadari is related to Telugu.
     # tat=[],
-    tgk=["tg-TJ"],
-    tgl=["fil-PH", "tl-PH"],
+    # tgk=["tg-TJ"], # Deprecated
+    tgl=["fil-PH"], # "tl-PH" deprecated
     tha=["th-TH"],
     # tir=[],
-    tpi=["tpi-PG"],
+    # tpi=["tpi-PG"], # Deprecated
     tur=["tr-TR"],
     ukr=["uk-UA"],
     urd=["ur-IN"],
     uzb=["uz-UZ"],
     vie=["vi-VN"],
-    yue=["zh-HK", "yue-CN"],
+    cmn=["zh-CN", "zh-CN-shandong", "zh-CN-sichuan", "zh-HK", "zh-TW"],
+    zho=["zh-CN", "zh-CN-shandong", "zh-CN-sichuan", "zh-HK", "zh-TW"],
+    yue=["yue-CN", "zh-HK"], # Cantonese
+    wuu=["wuu-CN"],
+    nan=["zh-TW"], # nan-TW deprecated
+    # Note, Taiwanese has one standard + one major dialect,
+    # not sure which is covered better by Azure.
     zul=["zu-ZA"]
 )
diff --git a/python/AzureSpeechDetection/plugin-files/descriptor/descriptor.json b/python/AzureSpeechDetection/plugin-files/descriptor/descriptor.json
@@ -10,7 +10,7 @@
     "description": "Uses Azure Cognitive Services to perform speech-to-text.",
     "actionType": "DETECTION",
     "trackType": "SPEECH",
-    "outputChangedCounter" : 2,
+    "outputChangedCounter": 2,
     "requiresCollection": {
       "states": []
     },
@@ -59,7 +59,7 @@
         },
         {
           "name": "LANGUAGE",
-          "description": "The language/locale to use for transcription.",
+          "description": "The language/locale, in BCP-47 format, to use for transcription. Please consult README to review Azure's supported list of BCP-47 codes.",
           "type": "STRING",
           "defaultValue": "en-US"
         },
@@ -193,4 +193,4 @@
       ]
     }
   ]
-}
+}
diff --git a/python/AzureSpeechDetection/tests/test_acs_speech.py b/python/AzureSpeechDetection/tests/test_acs_speech.py
@@ -112,7 +112,7 @@ def test_audio_file(self):
             stop_time=-1,
             job_properties=get_test_properties(
                 DIARIZE='FALSE',
-                LANGUAGE='en-US',
+                LANGUAGE='EN-us',
                 USE_SAS_AUTH='TRUE'
             ),
             media_properties={},
@@ -137,7 +137,7 @@ def test_video_file(self):
             stop_frame=-1,
             job_properties=get_test_properties(
                 DIARIZE='FALSE',
-                LANGUAGE='en-US'
+                LANGUAGE='En-Us'
             ),
             media_properties=dict(
                 FPS='24'
@@ -204,7 +204,7 @@ def test_language(self):
             stop_time=-1,
             job_properties=get_test_properties(
                 DIARIZE='TRUE',
-                LANGUAGE='en-US'
+                LANGUAGE='en-us'
             ),
             media_properties={},
             feed_forward_track=None