# Voice translator with Azure AI

<img src="https://github.com/retkowsky/azure-ai-translator/blob/main/webapp3.jpg?raw=true">

In [1]:
# %pip install azure-ai-translation-text==1.0.0b1

In [37]:
import datetime
import pandas as pd
import gradio as gr
import openai
import os
import sys

from azure.ai.translation.text import TextTranslationClient, TranslatorCredential
from azure.ai.translation.text.models import InputTextItem
from dotenv import load_dotenv
from IPython.display import Audio
from openai import AzureOpenAI

In [3]:
sys.version

'3.10.11 (main, May 16 2023, 00:28:57) [GCC 11.2.0]'

In [4]:
print(f"Today is {datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S')}")

Today is 16-Apr-2024 09:32:29


## 1. Settings

In [5]:
def check_openai_version():
    """
    Check OpenAI version
    """
    try:
        installed_version = openai.__version__
        version_number = float(installed_version.split('.')[0])

    except (AttributeError, ValueError):
        print("Invalid OpenAI version format")
        return

    print(f"Installed OpenAI version: {installed_version}\n")

    if version_number < 1.0:
        print("[Warning] You should upgrade OpenAI to have version >= 1.0.0")
        print("To upgrade, run: %pip install openai --upgrade")

    else:
        print(f"[OK] OpenAI version {installed_version} is greater than version 1.0.0")

In [6]:
check_openai_version()

Installed OpenAI version: 1.12.0

[OK] OpenAI version 1.12.0 is greater than version 1.0.0


In [7]:
load_dotenv("azure.env")

# Azure AI Translator
azure_ai_translator_key = os.getenv("AZURE_AI_TRANSLATION_KEY")
azure_ai_translator_endpoint = os.getenv("AZURE_AI_TRANSLATION_ENDPOINT")
azure_ai_translator_region = os.getenv("AZURE_AI_TRANSLATION_REGION")

# Azure OpenAI Whisper model
openai.api_base = os.getenv("AOAI_ENDPOINT")
openai.api_key = os.getenv("AOAI_KEY")
openai.api_version = os.getenv("AOAI_REGION")
openai.api_type = os.getenv("AOAI_TYPE")
deployment_id = os.getenv("AOAI_DEPLOYMENT")  # Whisper deployed name model
model = os.getenv("AOAI_MODEL")  # Whisper model

In [8]:
# Azure OpenAI Whisper client
client = AzureOpenAI(
    api_key=openai.api_key,
    api_version=openai.api_version,
    azure_endpoint=openai.api_base,
)

## 2. Speech to text with Azure OpenAI Whisper

In [9]:
def get_speech_to_text(audio_filename):
    """
    Transcribes speech from an audio file using Azure Speech to Text service.
    Input: audio file
    Output: text transcript
    """
    try:
        with open(audio_filename, "rb") as audio_file:
            result = client.audio.transcriptions.create(
                file=audio_file,
                model=deployment_id
            )
            return result.text

    except Exception as e:
        print(f"[Error] Speech to text error: {e}")
        return None

In [10]:
audio_filename = "sample.wav"

In [11]:
Audio(audio_filename, autoplay=False)

### 2.1 Test

In [12]:
# Test
results = get_speech_to_text(audio_filename)
print(results)

Hello, Good Morning, Welcome to this presentation.


### 2.2 Speech to Text webapp

In [13]:
title = "Speech to Text with Azure OpenAI Whisper"
image_url = "https://github.com/retkowsky/azure-ai-translator/blob/main/logotranslate.png?raw=true"
logo = "<center> <img src= {} width=200px></center>".format(image_url)

stt_webapp = gr.Interface(
    get_speech_to_text,
    gr.Audio(sources="microphone", type="filepath", label="Audio Recording"),
    outputs=gr.Text(label="Text"),
    title=title,
    description=logo,
)

In [14]:
stt_webapp.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://dbc2b729850c101b88.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




## 3. Languages supported

In [15]:
# Translation client
text_translator = TextTranslationClient(
    endpoint=azure_ai_translator_endpoint,
    credential=TranslatorCredential(
        azure_ai_translator_key, azure_ai_translator_region
    ),
)

In [16]:
%%javascript Python 
OutputArea.auto_scroll_threshold = 9999

<IPython.core.display.Javascript object>

In [17]:
def print_languages(label, languages):
        """
        Print supported languages
        """
        print("\033[1;31;34m")
        if languages is not None:
            print(f"Number of supported {label} languages = {len(languages)}\n")
            print(f"{label.capitalize()} languages:")

            for idx, (key, value) in enumerate(languages.items(), start=1):
                print(f"{idx:03}\t{key:10} {value.name} ({value.native_name})")

        else:
            print(f"No supported {label} languages.")

In [18]:
response = text_translator.get_languages()

In [19]:
print_languages("translation", response.translation)

[1;31;34m
Number of supported translation languages = 135

Translation languages:
001	af         Afrikaans (Afrikaans)
002	am         Amharic (አማርኛ)
003	ar         Arabic (العربية)
004	as         Assamese (অসমীয়া)
005	az         Azerbaijani (Azərbaycan)
006	ba         Bashkir (Bashkir)
007	bg         Bulgarian (Български)
008	bho        Bhojpuri (Bhojpuri)
009	bn         Bangla (বাংলা)
010	bo         Tibetan (བོད་སྐད་)
011	brx        Bodo (बड़ो)
012	bs         Bosnian (Bosnian)
013	ca         Catalan (Català)
014	cs         Czech (Čeština)
015	cy         Welsh (Cymraeg)
016	da         Danish (Dansk)
017	de         German (Deutsch)
018	doi        Dogri (Dogri)
019	dsb        Lower Sorbian (Dolnoserbšćina)
020	dv         Divehi (ދިވެހިބަސް)
021	el         Greek (Ελληνικά)
022	en         English (English)
023	es         Spanish (Español)
024	et         Estonian (Eesti)
025	eu         Basque (Euskara)
026	fa         Persian (فارسی)
027	fi         Finnish (Suomi)
028	fil        Filipino (

In [20]:
print_languages("transliteration", response.transliteration)

[1;31;34m
Number of supported transliteration languages = 42

Transliteration languages:
001	ar         Arabic (العربية)
002	as         Assamese (অসমীয়া)
003	be         Belarusian (беларуская)
004	bg         Bulgarian (Български)
005	bn         Bangla (বাংলা)
006	brx        Bodo (बड़ो)
007	el         Greek (Ελληνικά)
008	fa         Persian (فارسی)
009	gom        Konkani (Konkani)
010	gu         Gujarati (ગુજરાતી)
011	he         Hebrew (עברית)
012	hi         Hindi (हिन्दी)
013	ja         Japanese (日本語)
014	kk         Kazakh (Қазақ Тілі)
015	kn         Kannada (ಕನ್ನಡ)
016	ko         Korean (한국어)
017	ks         Kashmiri (کٲشُر)
018	ky         Kyrgyz (Кыргызча)
019	mai        Maithili (Maithili)
020	mk         Macedonian (Македонски)
021	ml         Malayalam (മലയാളം)
022	mn-Cyrl    Mongolian (Cyrillic) (Mongolian (Cyrillic))
023	mni        Manipuri (ꯃꯩꯇꯩꯂꯣꯟ)
024	mr         Marathi (मराठी)
025	ne         Nepali (नेपाली)
026	or         Odia (ଓଡ଼ିଆ)
027	pa         Punjabi (ਪੰਜਾਬੀ)
028	ru   

In [21]:
print_languages("dictionary", response.dictionary)

[1;31;34m
Number of supported dictionary languages = 50

Dictionary languages:
001	af         Afrikaans (Afrikaans)
002	ar         Arabic (العربية)
003	bg         Bulgarian (Български)
004	bn         Bangla (বাংলা)
005	bs         Bosnian (Bosnian)
006	ca         Catalan (Català)
007	cs         Czech (Čeština)
008	cy         Welsh (Cymraeg)
009	da         Danish (Dansk)
010	de         German (Deutsch)
011	el         Greek (Ελληνικά)
012	en         English (English)
013	es         Spanish (Español)
014	et         Estonian (Eesti)
015	fa         Persian (فارسی)
016	fi         Finnish (Suomi)
017	fr         French (Français)
018	he         Hebrew (עברית)
019	hi         Hindi (हिन्दी)
020	hr         Croatian (Hrvatski)
021	hu         Hungarian (Magyar)
022	id         Indonesian (Indonesia)
023	is         Icelandic (Íslenska)
024	it         Italian (Italiano)
025	ja         Japanese (日本語)
026	ko         Korean (한국어)
027	lt         Lithuanian (Lietuvių)
028	lv         Latvian (Latviešu)
029	

In [22]:
# Loading the supported languages into a dataframe
langlist = []

if response.translation is not None:
    for key, value in response.translation.items():
        langlist.append(
            {
                "Language_Code": key,
                "Language_Name": value.name,
                "Native_Name": value.native_name,
            }
        )

df_languages = pd.DataFrame(langlist)
df_languages

Unnamed: 0,Language_Code,Language_Name,Native_Name
0,af,Afrikaans,Afrikaans
1,am,Amharic,አማርኛ
2,ar,Arabic,العربية
3,as,Assamese,অসমীয়া
4,az,Azerbaijani,Azərbaycan
...,...,...,...
130,yua,Yucatec Maya,Yucatec Maya
131,yue,Cantonese (Traditional),粵語 (繁體)
132,zh-Hans,Chinese Simplified,中文 (简体)
133,zh-Hant,Chinese Traditional,繁體中文 (繁體)


In [23]:
print("Azure AI Translator supported languages")
print("\033[1;31;34m")
print(df_languages.to_string())

Azure AI Translator supported languages
[1;31;34m
    Language_Code            Language_Name           Native_Name
0              af                Afrikaans             Afrikaans
1              am                  Amharic                  አማርኛ
2              ar                   Arabic               العربية
3              as                 Assamese               অসমীয়া
4              az              Azerbaijani            Azərbaycan
5              ba                  Bashkir               Bashkir
6              bg                Bulgarian             Български
7             bho                 Bhojpuri              Bhojpuri
8              bn                   Bangla                 বাংলা
9              bo                  Tibetan              བོད་སྐད་
10            brx                     Bodo                  बड़ो
11             bs                  Bosnian               Bosnian
12             ca                  Catalan                Català
13             cs                    Cz

In [24]:
df_languages.to_excel("azure_ai_translator_languages.xlsx")

In [25]:
# Get dict from dataframe
language_dict = df_languages.set_index("Language_Code")["Language_Name"].to_dict()
# Full names list
language_full_names = list(language_dict.values())
# Sort
language_full_names.sort()
# Language codes list
language_codes = list(language_dict.keys())
# Sort
language_codes.sort()
# Creation of dict
reverse_language_names = {v: k for k, v in language_dict.items()}

In [26]:
print(language_codes)

['af', 'am', 'ar', 'as', 'az', 'ba', 'bg', 'bho', 'bn', 'bo', 'brx', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'doi', 'dsb', 'dv', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fil', 'fj', 'fo', 'fr', 'fr-CA', 'ga', 'gl', 'gom', 'gu', 'ha', 'he', 'hi', 'hne', 'hr', 'hsb', 'ht', 'hu', 'hy', 'id', 'ig', 'ikt', 'is', 'it', 'iu', 'iu-Latn', 'ja', 'ka', 'kk', 'km', 'kmr', 'kn', 'ko', 'ks', 'ku', 'ky', 'ln', 'lo', 'lt', 'lug', 'lv', 'lzh', 'mai', 'mg', 'mi', 'mk', 'ml', 'mn-Cyrl', 'mn-Mong', 'mni', 'mr', 'ms', 'mt', 'mww', 'my', 'nb', 'ne', 'nl', 'nso', 'nya', 'or', 'otq', 'pa', 'pl', 'prs', 'ps', 'pt', 'pt-PT', 'ro', 'ru', 'run', 'rw', 'sd', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr-Cyrl', 'sr-Latn', 'st', 'sv', 'sw', 'ta', 'te', 'th', 'ti', 'tk', 'tlh-Latn', 'tlh-Piqd', 'tn', 'to', 'tr', 'tt', 'ty', 'ug', 'uk', 'ur', 'uz', 'vi', 'xh', 'yo', 'yua', 'yue', 'zh-Hans', 'zh-Hant', 'zu']


In [27]:
print(reverse_language_names)

{'Afrikaans': 'af', 'Amharic': 'am', 'Arabic': 'ar', 'Assamese': 'as', 'Azerbaijani': 'az', 'Bashkir': 'ba', 'Bulgarian': 'bg', 'Bhojpuri': 'bho', 'Bangla': 'bn', 'Tibetan': 'bo', 'Bodo': 'brx', 'Bosnian': 'bs', 'Catalan': 'ca', 'Czech': 'cs', 'Welsh': 'cy', 'Danish': 'da', 'German': 'de', 'Dogri': 'doi', 'Lower Sorbian': 'dsb', 'Divehi': 'dv', 'Greek': 'el', 'English': 'en', 'Spanish': 'es', 'Estonian': 'et', 'Basque': 'eu', 'Persian': 'fa', 'Finnish': 'fi', 'Filipino': 'fil', 'Fijian': 'fj', 'Faroese': 'fo', 'French': 'fr', 'French (Canada)': 'fr-CA', 'Irish': 'ga', 'Galician': 'gl', 'Konkani': 'gom', 'Gujarati': 'gu', 'Hausa': 'ha', 'Hebrew': 'he', 'Hindi': 'hi', 'Chhattisgarhi': 'hne', 'Croatian': 'hr', 'Upper Sorbian': 'hsb', 'Haitian Creole': 'ht', 'Hungarian': 'hu', 'Armenian': 'hy', 'Indonesian': 'id', 'Igbo': 'ig', 'Inuinnaqtun': 'ikt', 'Icelandic': 'is', 'Italian': 'it', 'Inuktitut': 'iu', 'Inuktitut (Latin)': 'iu-Latn', 'Japanese': 'ja', 'Georgian': 'ka', 'Kazakh': 'kk', 'Kh

In [28]:
print(f"Number of supported languages with Azure AI Translator = {len(reverse_language_names)}")

Number of supported languages with Azure AI Translator = 135


In [29]:
def get_language_code(language_name):
    """
    Function to get language code by language name
    Input: language full name
    Output: language code name
    """
    return reverse_language_names.get(language_name)

### 3.1 Test

In [30]:
# Test
print(get_language_code("English"))

en


## 4.1 Translation

In [31]:
def get_translation(mytext, source_lang, target_lang):
    """
    Translate a text from one language to another one
    """
    # Get source language code from its fullname
    source_lang = get_language_code(source_lang)
    # Get target language code from its fullname
    target_lang = get_language_code(target_lang)
    
    target_lang = [target_lang]
    input_text_elements = [InputTextItem(text=mytext)]

    try:
        # Calling the Azure AI translator API
        response = text_translator.translate(
            content=input_text_elements,
            to=target_lang,
            from_parameter=source_lang
        )

        # Result
        resp = response[0] if response else None

        for translated_text in resp.translations:
            result = translated_text.text

        return result
    
    except Exception as e:
        print(f"[Error] Translation error: {e}")
        return None

### 4.1 Test

In [32]:
# Quick Test
result = get_translation("Welcome to this presentation", "English", "German")
print(result)

Willkommen zu dieser Präsentation


### 5. STT and Translation

In [33]:
def stt_and_translation(audio_filename, source_lang, target_lang):
    """
    Speech to Text and translation using the STT transcript
    """
    try:
        # Speech to text result
        transcript = get_speech_to_text(audio_filename)
        # Translation of the Speech to text result
        translation = get_translation(transcript, source_lang, target_lang)
        
        return transcript, translation
    
    except Exception as e:
        print(f"[Error] Speech to text and translation error: {e}")
        
        return None, None

### 5.1 Test

In [34]:
# Test
transcript, translation = stt_and_translation(
    audio_filename, "English", "Spanish")

transcript, translation

('Hello, Good Morning, Welcome to this presentation.',
 'Hola, buenos días, bienvenidos a esta presentación.')

### 5.2 Speech to Text and Translation using a webapp

In [35]:
image_url = "https://github.com/retkowsky/azure-ai-translator/blob/main/logotranslate.png?raw=true"
logo = "<center> <img src= {} width=200px></center>".format(image_url)
title = ("Vocal translator using Azure AI services")
theme = "JohnSmith9982/small_and_pretty"  # https://huggingface.co/spaces/gradio/theme-gallery

translator_webapp = gr.Interface(
    stt_and_translation,
    inputs=[
        gr.components.Audio(
            sources="microphone", 
            type="filepath",
            label="Audio recording"
        ),
        gr.components.Dropdown(
            label="Audio source language",
            value="French",
            choices=language_full_names
        ),
        gr.components.Dropdown(
            label="Text target language",
            value="English",
            choices=language_full_names
        ),
    ],
    outputs=[
        gr.Text(label="Source text"),
        gr.Text(label="Translated text"),
    ],
    cache_examples=False,
    title=title,
    description=logo,
    theme=theme,
)

In [36]:
translator_webapp.launch(share=True)

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://3f4b7420bc5e6072a8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


