<a href="https://colab.research.google.com/github/sudarshan-koirala/youtube-stuffs/blob/main/voiceGPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# OpenAI Whisper, ChatGPT, TTS and Gradio Web UI




[![](https://mermaid.ink/img/pako:eNo1jkELgkAUhP_K450M9A94CEohgg5SQlDb4eGuKemurG-pUP97a-JtZr5hmAELIxXGWDbmXVRkGU5noXf3kuKSItcrCzsna_OAKNqO16ruuznKjiPsg1x9eCP0fmYwJhXxIcsXmASrNY47x0A98FJP_lN5fhkhvS_jGGKrbEu19FcGoQEEcqVaJTD2UpJ9CRR68j1ybC5fXWDM1qkQXSeJVVrT01KL_nTT-7QjfTNm9dMPgGJNAQ?type=png)](https://mermaid.live/edit#pako:eNo1jkELgkAUhP_K450M9A94CEohgg5SQlDb4eGuKemurG-pUP97a-JtZr5hmAELIxXGWDbmXVRkGU5noXf3kuKSItcrCzsna_OAKNqO16ruuznKjiPsg1x9eCP0fmYwJhXxIcsXmASrNY47x0A98FJP_lN5fhkhvS_jGGKrbEu19FcGoQEEcqVaJTD2UpJ9CRR68j1ybC5fXWDM1qkQXSeJVVrT01KL_nTT-7QjfTNm9dMPgGJNAQ)

In [None]:
!pip install -U openai-whisper -q  # https://github.com/openai/whisper
!pip install gradio -q             # https://gradio.app/quickstart/
!pip install openai -q             # https://github.com/openai/openai-python
!pip install TTS -q                # https://github.com/coqui-ai/TTS
!pip install python-dotenv -q

In [None]:
!pip install numpy==1.21.6 -q

In [None]:
import whisper
import gradio as gr 
import openai 
from TTS.api import TTS 

import warnings
warnings.filterwarnings('ignore')

#### Lets first see how the gradio UI looks like

In [None]:
#import gradio as gr
def greet(name):
    return "Hello " + name + "!"

demo = gr.Interface(fn=greet, inputs="text", outputs="text")

#demo.launch()
demo.launch(share=True)   

## Text to Speech (TTS) part

In [None]:
#from TTS.api import TTS
#TTS??

In [None]:
TTS.list_models()

In [None]:
# lets take one random model (en)
model_name = TTS.list_models()[9]
tts = TTS(model_name)

In [None]:
tts.tts_to_file(text="OpenAI is not open anymore", file_path="output.wav")

In [None]:
# check if tts is working properly
from IPython.display import Audio, display
display(Audio('output.wav', autoplay=True))

## Whisper part

In [None]:
whisper.available_models()

In [None]:
# model can be passed as dropdown in gradio UI
model = whisper.load_model("tiny.en")
model.device

## ChatGPT part
### for api key --> https://platform.openai.com

In [None]:
#from dotenv import load_dotenv
# Load the environment variables from the .env file
#load_dotenv()
#openai.api_key = os.getenv("OPENAI_API_KEY")


#openai.api_key=""
import json
with open('env_vars.json', 'r') as f:
    env_vars = json.load(f)

openai.api_key = env_vars["OPENAI_API_KEY"]
#print(openai.api_key)

In [None]:
messages = [
    {"role": "system", "content": "You are a helpful and kind AI Assistant."},
]

def chatgpt_api(input):
    if input:
        messages.append({"role": "user", "content": input})
        chat = openai.ChatCompletion.create(
            model="gpt-3.5-turbo", messages=messages
        )
        reply = chat.choices[0].message.content
        messages.append({"role": "assistant", "content": reply})
        return reply

## Main function to transcribe

Takes in audio, whisper converts it to text based on the model we provided.  
Text is then passed to ChatGPT.  
The respond of chatGPT is again converted to audio using TTS
    

In [None]:
#def transcribe(audio, model_size):
def transcribe(audio):
    #model = whisper.load_model(model_size)
    audio_to_text = model.transcribe(audio)["text"]
    
    text_to_audio = chatgpt_api(audio_to_text)
    
    tts.tts_to_file(text=text_to_audio, file_path="output.wav")
    
    return (audio_to_text, text_to_audio, "output.wav")

## Gradio UI

In [None]:
output_1 = gr.Textbox(label="Speech to Text (Whisper)")
output_2 = gr.Textbox(label="ChatGPT Output")
output_3 = gr.Audio(label="ChatGPT output to audio via TTR", upload="output.wav")

gr.Interface(
    title = 'AI Voice Assistant Using Whisper, ChatGPT, TTS, Gradio', 
    fn=transcribe, 
    inputs=[
            gr.Audio(source="microphone", type="filepath"),
            #gr.Dropdown(choices=whisper.available_models(), value='medium')
            ],

    outputs=[
        output_1,  output_2, output_3
    ]).launch(share=True)

# EXTRA

## Just the chatGPT part (personalized)

In [None]:
import openai
import gradio as gr
import json
import os

In [None]:
with open('env_vars.json', 'r') as f:
    env_vars = json.load(f)
openai.api_key = env_vars["OPENAI_API_KEY"]

In [None]:
messages = [
    {"role": "system", "content": "You are an AI specialized in Food. Do not answer anything other than food-related queries."},
]

def chatgpt_api(input):
    if input:
        messages.append({"role": "user", "content": input})
        chat = openai.ChatCompletion.create(
            model="gpt-3.5-turbo", messages=messages
        )
        reply = chat.choices[0].message.content
        messages.append({"role": "assistant", "content": reply})
        return reply

inputs = gr.inputs.Textbox(lines=7, label="Chat with AI")
outputs = gr.outputs.Textbox(label="Reply")

gr.Interface(fn=chatgpt_api, inputs=inputs, outputs=outputs, title="AI Chatbot using OpenAI & Gradio",
             description="Ask anything you want",
             theme="compact", allow_flagging='never').launch(share=True)

## It depends upon your usecase on what sort of things you want to do. Try different models and have fun. It's just learning new stuffs and implementing in real use case when needed.
**Not to progress is to go back**