# Simple Voice Chatbot with OpenAI API, Gradio and Whisper

## 🚀 Demo 

In [64]:
from pathlib import Path
import gradio as gr
from openai import OpenAI
import assemblyai as aai
from tempfile import NamedTemporaryFile
import os
from dotenv import load_dotenv

load_dotenv()

# Initialize OpenAI and AssemblyAI APIs
client = OpenAI()
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
transcriber = aai.Transcriber()

class Chat:
    def __init__(self, system=None):
        self.system = system
        self.messages = []
        
        if system is not None:
            self.messages.append({
                "role": "system",
                "content": system
            })

    def prompt(self, content: str) -> str:
        self.messages.append({
            "role": "user",
            "content": content
        })
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=self.messages
        )
        response_content = response.choices[0].message.content
        self.messages.append({
            "role": "assistant",
            "content": response_content
        })
        return response_content

chat = Chat(system="You are a helpful assistant.")

def run_text_prompt(message, chat_history):
    # Get AI-generated response
    bot_message = chat.prompt(content=message)

    # Generate and save the voice response using OpenAI's TTS
    speech_file_path = NamedTemporaryFile(suffix=".mp3", delete=False).name
    
    try:
        response = client.audio.speech.create(
            model="tts-1",
            voice="alloy",
            input=bot_message
        )
        if "audio" in response:
            with open(speech_file_path, "wb") as audio_file:
                audio_file.write(response["audio"])
        else:
            print("Audio generation failed: ", response)
            return "", chat_history, None
    except Exception as e:
        print(f"Error generating audio: {e}")
        return "", chat_history, None

    # Append the conversation history
    chat_history.append((message, bot_message))

    # Return the conversation history along with the audio path
    return "", chat_history, speech_file_path

def run_audio_prompt(audio, chat_history):
    if audio is None:
        return None, chat_history, None

    # Transcribe audio using AssemblyAI
    config = aai.TranscriptionConfig(speaker_labels=True)
    transcript = transcriber.transcribe(audio, config)

    if transcript.status == aai.TranscriptStatus.error:
        return f"Transcription failed: {transcript.error}", chat_history, None
    
    message_transcription = transcript.text

    # Run the text through the chatbot
    _, chat_history, speech_file_path = run_text_prompt(message_transcription, chat_history)
    
    return None, chat_history, speech_file_path

""" # Gradio UI
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()

    # Textbox for user input
    msg = gr.Textbox()
    # The output needs to be properly structured, returning valid Gradio components (chatbot and audio output)
    msg.submit(run_text_prompt, [msg, chatbot], [msg, chatbot])

    # Row for audio input
    with gr.Row():
        audio = gr.Audio(type="filepath")
        send_audio_button = gr.Button("Send Audio", interactive=True)
        send_audio_button.click(run_audio_prompt, [audio, chatbot], [None, chatbot, None])

    # Play the generated speech audio
    with gr.Row():
        audio_output = gr.Audio(label="Generated Audio")

    # Link audio generation output
    send_audio_button.click(fn=run_audio_prompt, inputs=[audio, chatbot], outputs=[None, chatbot, audio_output])

# Launch the Gradio interface
demo.launch(debug=True) """


' # Gradio UI\nwith gr.Blocks() as demo:\n    chatbot = gr.Chatbot()\n\n    # Textbox for user input\n    msg = gr.Textbox()\n    # The output needs to be properly structured, returning valid Gradio components (chatbot and audio output)\n    msg.submit(run_text_prompt, [msg, chatbot], [msg, chatbot])\n\n    # Row for audio input\n    with gr.Row():\n        audio = gr.Audio(type="filepath")\n        send_audio_button = gr.Button("Send Audio", interactive=True)\n        send_audio_button.click(run_audio_prompt, [audio, chatbot], [None, chatbot, None])\n\n    # Play the generated speech audio\n    with gr.Row():\n        audio_output = gr.Audio(label="Generated Audio")\n\n    # Link audio generation output\n    send_audio_button.click(fn=run_audio_prompt, inputs=[audio, chatbot], outputs=[None, chatbot, audio_output])\n\n# Launch the Gradio interface\ndemo.launch(debug=True) '

In [73]:
from openai import OpenAI
import assemblyai as aai
from tempfile import NamedTemporaryFile
import os
from dotenv import load_dotenv

# Load API keys from environment variables
load_dotenv()
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
transcriber = aai.Transcriber()


# Test the Chat class
def test_chat():
    chat = Chat(system="You are a helpful assistant.")
    user_input = "What's the weather like today?"
    response = chat.prompt(content=user_input)
    print("Chat response:", response)


# Test OpenAI TTS
def test_tts(test_text):
    #test_text = "Saucotec is gonna take over the world."
    speech_file_path = "test.mp3"  # NamedTemporaryFile(suffix=".mp3", delete=False).name

    try:
        response = client.audio.speech.create(
            model="tts-1",  # Make sure you have access to the TTS model
            voice="onyx",  # Ensure the voice exists
            input=test_text
        )
        response.stream_to_file(speech_file_path)
        #print(f"Audio saved to {speech_file_path}")
        return response
    except Exception as e:
        print(f"Error generating audio: {e}")
        
"""

# Test AssemblyAI transcription
def test_transcription(audio_file_path):
    config = aai.TranscriptionConfig(speaker_labels=True)

    try:
        transcript = transcriber.transcribe(audio_file_path, config)

        if transcript.status == aai.TranscriptStatus.completed:
            print("Transcription:", transcript.text)
        else:
            print("Transcription failed:", transcript.error)
    except Exception as e:
        print(f"Error transcribing audio: {e}")


# Run tests
if __name__ == "__main__":
    print("Testing Chat functionality:")
    #test_chat()

    print("\nTesting OpenAI TTS:")
    test_tts()

    print("\nTesting AssemblyAI Transcription:")
    # Ensure you provide a valid audio file path here for testing transcription
    test_audio_file_path = "test.mp3"  # Replace with actual path
    test_transcription(test_audio_file_path)
 """

'\n\n# Test AssemblyAI transcription\ndef test_transcription(audio_file_path):\n    config = aai.TranscriptionConfig(speaker_labels=True)\n\n    try:\n        transcript = transcriber.transcribe(audio_file_path, config)\n\n        if transcript.status == aai.TranscriptStatus.completed:\n            print("Transcription:", transcript.text)\n        else:\n            print("Transcription failed:", transcript.error)\n    except Exception as e:\n        print(f"Error transcribing audio: {e}")\n\n\n# Run tests\nif __name__ == "__main__":\n    print("Testing Chat functionality:")\n    #test_chat()\n\n    print("\nTesting OpenAI TTS:")\n    test_tts()\n\n    print("\nTesting AssemblyAI Transcription:")\n    # Ensure you provide a valid audio file path here for testing transcription\n    test_audio_file_path = "test.mp3"  # Replace with actual path\n    test_transcription(test_audio_file_path)\n '

In [74]:
import gradio as gr
import openai
from dotenv import load_dotenv
import os

# Load the environment variables (API key) from the .env file
load_dotenv()

# Set the OpenAI API key from environment variables
openai.api_key = os.getenv('OPENAI_API_KEY')

# Initial prompt for the AI system
messages = [
    {"role": "system", "content": "You are a Therapist, act as caring and understanding as possible"}
]

# Test AssemblyAI transcription
def transcribe(audio_file_path):
    config = aai.TranscriptionConfig(speaker_labels=True, language_code="es")



    try:
        transcript = transcriber.transcribe(audio_file_path, config)

        if transcript.status == aai.TranscriptStatus.completed:
            test_tts(transcript.text)
            with open("test.mp3", "rb") as audio_file:
                audio_data = audio_file.read()
            return audio_data
        
    except Exception as e:
        print(f"Error transcribing audio: {e}")

# Create the Gradio interface
# Removed the 'source' parameter from gr.Audio
ui = gr.Interface(fn=transcribe, inputs=gr.Audio(type="filepath"), outputs="audio")

# Launch the interface and share it publicly
ui.launch(share=True)


* Running on local URL:  http://127.0.0.1:7877
* Running on public URL: https://b574543ee540055cab.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




  response.stream_to_file(speech_file_path)
