In [3]:
# pip install langdetect
# pip install sentencepiece
# pip install boto3
# pip install awscli
# pip install sacremoses

In [10]:
import gradio as gr
from transformers import pipeline, AutoTokenizer, TFAutoModelForSeq2SeqLM
from dotenv import load_dotenv
import os
import subprocess
import torch
#Google Text to Speech
from gtts import gTTS
import tempfile
from langdetect import detect
from transformers import MarianMTModel, MarianTokenizer

import boto3

In [29]:
# import functions from functions file

from functions_mm import handle_query, transcribe_audio_original, polly_text_to_speech, voice_map, language_map



In [14]:
# This function transcribes audio to text using Whisper in the original language it was spoken
def transcribe_audio_original(audio_filepath):
    try:
        transcription_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large")
        transcription_result = transcription_pipeline(audio_filepath)
        transcribed_text = transcription_result['text']
        return transcribed_text
    except Exception as e:
        print(f"an error occured: {e}")
        return "Error in transcription"

In [28]:
# This function transcribes audio to text and then translates it into the specified language
def translate(transcribed_text, target_lang="es"):
    try:
        #Define the model and tokenizer
        src_lang = detect(transcribed_text)
        model_name =f"Helsinki-NLP/opus-mt-{src_lang}-{target_lang}"
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        
        #tokenize the text and check if longer than the max length
        tokens = tokenizer.encode(transcribed_text, return_tensors="pt", truncation=True)
        max_length = tokenizer.model_max_length
        total_length = tokens.size(1)
        
        #initialize the translated text
        full_translation = ""
        
        #Process text in chunks if it's too long
        for start_index in range(0, total_length, max_length):
            end_index = start_index + max_length
            
            #ensure not to exceed total length
            segment_ids = tokens[:,start_index:end_index]
            
            #generate translation for the segment
            translated_tokens = model.generate(segment_ids)
            
            #decode the translated tokens and append
            segment_translation = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
            full_translation += segment_translation + " "
        
        # #generate translation using the model
        # translated_tokens = model.generate(**encoded_text)
        
        # #decode the translated tokens
        # translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
        
        return full_translation.strip()
        
    except Exception as e:
        print(f"An error occurred: {e}")
        return "Error in transcription or translation"

In [23]:
default_language = "English"

#Define combined function to feed into Gradio app
def combined_function (audio_filepath=None, typed_text=None, target_lang=default_language):
    
    #Determine source of text: audio transctiption or direct text input
    if audio_filepath and typed_text:
        return "Please use only one input method at a time", None
    
    if not audio_filepath and not typed_text:
        return "Please provide input by typing or speaking", None
    
    response_speech = None
    
    if typed_text:
        #submit through handle_query function
        # query_text = typed_text
        detected_lang_code = detect(typed_text)
        response_text = handle_query(typed_text)
        response_speech = polly_text_to_speech(response_text, detected_lang_code)
        
    
    elif audio_filepath:
        #transcribe audio to text in background
        query_text = transcribe_audio_original(audio_filepath)
        detected_lang_code = detect(query_text)
        response_text = handle_query(query_text)
        response_speech = polly_text_to_speech(response_text, detected_lang_code)
        
    
    if not response_speech:
        response_speech = "No audio available"
    
    
    #Map detected language code to language name
    # detected_lang = [key for key, value in language_map.items() if value == detected_lang_code][0]
    
    
    return response_text, response_speech


In [26]:
def translate_and_speech(response_text=None, target_lang=default_language):
    
        
    #Detect language of input text
    detected_lang_code = detect(response_text)
    detected_lang = [key for key, value in language_map.items() if value == detected_lang_code][0]
    
    #Check if the language is specified. Default to English if not.
    target_lang_code = language_map.get(target_lang, "en")
    
    #Process text: translate 
    #Check if the detected language and target language are the same
    if detected_lang == target_lang:
        translated_response = response_text
    else:
        translated_response = translate(response_text, target_lang_code)
    
    #convert to speech
    translated_speech = polly_text_to_speech(translated_response, target_lang_code)
    
    return  translated_response, translated_speech

In [17]:
# Load environment variables.
load_dotenv()

# Set the model name for our LLMs.
OPENAI_MODEL = "gpt-3.5-turbo"
# Store the API key in a variable.
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [18]:
# list of languages and their codes for dropdown
languages = gr.Dropdown(label="Click in the middle of the dropdown bar to select translation language", choices=list(language_map.keys()))

In [8]:
# Function to clear out all inputs

def clear_inputs():
    return None, None, None, None, None, None

In [19]:
default_language = "English"

In [30]:
instructions = """
# Diabetes Chatbot
### Step 1: Record your audio OR input text (NOT both!)
### Step 2: Would you like response in new language? Choose your language  
### Step 3: Submit question
### Step 4: Translate response
### Step 5: Clear inputs and start fresh
"""


with gr.Blocks() as app2:
    
    with gr.Row():
        gr.Markdown(instructions)
        
    with gr.Row():
        input_audio = gr.Audio(
            label="Ask a question about Diabetes",
            type="filepath")
        language_dropdown = gr.Dropdown(label="Click the middle of the dropdown bar to select translation language",
                                        choices=list(language_map.keys()), value=default_language, type='value')
        
    with gr.Row():
        submit_button = gr.Button("Submit your question")
        translate_button = gr.Button("Translate the response")
        clear_button = gr.Button("Clear All")
    
    #Divide the screen horizontally into 2 columns
    with gr.Row():
            #This column will be on the left side of screen
            with gr.Column():
                query_text = gr.Textbox(label="Type your question here")
                # output_original_speech = gr.Audio(label="Text to speech here")
            
            with gr.Column():
                response_text = gr.Textbox(label="Chatbot response")
                response_speech = gr.Audio(label="Chatbot response speech")    
        
            #This column will be on the right side of screen
            with gr.Column():    
                output_translated = gr.Textbox(label="Translated text")
                output_translated_speech = gr.Audio(label="Translated speech")

    # Audio transcription
    submit_button.click(
        fn=combined_function,
        inputs=[input_audio, query_text, language_dropdown],
        outputs=[response_text, response_speech]
    )
        
    # Translation
    translate_button.click(
        fn=translate_and_speech,
        inputs=[response_text, language_dropdown],
        outputs=[output_translated, output_translated_speech]
    )
        
    #Clearing all inputs and outputs
    clear_button.click(
    fn=clear_inputs,
    inputs=[],
    outputs=[input_audio, query_text, response_text, response_speech, output_translated, output_translated_speech]
    )

app2.launch(show_error=True, share=True)






Running on local URL:  http://127.0.0.1:7867
Running on public URL: https://937f7cf3331dde3fa5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


