In [None]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, Audio, display, update_display
from openai import OpenAI
import gradio as gr
import base64
from io import BytesIO
from PIL import Image
from pydub import AudioSegment
from pydub.playback import play

In [None]:
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"Openai API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")

openai = OpenAI()

In [None]:
system_prompt = "You are a helpful technical tutor who answers questions about python code, software engineering, data science and LLMs"

In [None]:
subscription_prices = {"gemini": "free", "openai": "minimum 5$", "claude": "20$"}

In [None]:
def get_subscription_price(name):
    model_name = name.lower()
    return subscription_prices.get(model_name, "Unknown")

In [None]:
subscription_function = {
    "name": "get_subscription_price",
    "description": "Get the price of an AI producer plan. Call this whenever you need to know the AI price, for example when a customer asks 'How much is for gemini'",
    "parameters": {
        "type": "object",
        "properties": {
            "name": {
                "type": "string", 
                "description": "AI environment producer name whose AI customer want to buy"
            }
        },
        "required": ["name"],
        "additionalProperties": False
    }        
}
tools = [{"type": "function", "function": subscription_function}]

In [None]:
def artist(model_name):
    try:
        image_response = openai.images.generate(
            model="dall-e-3",
            prompt=f"An image representing creative vision of {model_name}, showing people surrounding main object. Try to place everywhere everything unique about {model_name}, in a vibrant pop-art style",
            size="1024x1024",
            n=1,
            response_format="b64_json",
        )
        image_base64 = image_response.data[0].b64_json
        image_data = base64.b64decode(image_base64)
        return Image.open(BytesIO(image_data))
    except Exception as e:
        print(f"Error generating image: {e}")
        return None

In [None]:
def talker(message):
    try:
        response = openai.audio.speech.create(
            model="tts-1",
            voice="onyx",
            input=message)
        audio_stream = BytesIO(response.content)
        return audio_stream.getvalue()
    except Exception as e:
        print(f"Error generating audio: {e}")
        return None

In [None]:
def chat(history, model_choice):
    messages = [{"role": "system", "content": system_prompt}] + history
    image = None
    audio_data = None
    partial_reply = ""
    
    response = openai.chat.completions.create(model=model_choice, messages=messages, tools=tools)
    
    if response.choices[0].finish_reason == "tool_calls":
        message = response.choices[0].message
        tool_response, model_name = handle_tool_call(message)
        messages.append(message)
        messages.append(tool_response)
        image = artist(model_name)
        
    stream_response = openai.chat.completions.create(model=model_choice, messages=messages, stream=True)
    for chunk in stream_response:
        if chunk.choices[0].delta.content:
            partial_reply += chunk.choices[0].delta.content
            history[-1] = {"role": "assistant", "content": partial_reply} if history and history[-1].get("role") == "assistant" else {"role": "assistant", "content": partial_reply}
            yield history, image, audio_data
    
    audio_data = talker(partial_reply)
    history.append({"role": "assistant", "content": partial_reply})
    yield history, image, audio_data

In [None]:
def handle_tool_call(message):
    try:
        tool_call = message.tool_calls[0]
        arguments = json.loads(tool_call.function.arguments)
        model_name = arguments.get('name')
        price = get_subscription_price(model_name)
        response = {
            "role": "tool",
            "content": json.dumps({"name": model_name, "price": price}),
            "tool_call_id": tool_call.id
        }
        return response, model_name
    except Exception as e:
        print(f"Tool call error: {e}")
        return {"role": "tool", "content": json.dumps({"error": "Invalid tool call"})}, None

In [None]:
def transcribe_audio(audio):
    try:
        with open("input_audio.mp3", "wb") as f:
            f.write(audio)
        with open("input_audio.mp3", "rb") as f:
            transcription = openai.audio.transcriptions.create(model="whisper-1", file=f)
        return transcription.text
    except Exception as e:
        print(f"Transcription error: {e}")
        return ""

In [None]:
with gr.Blocks() as ui:
    model_choice = gr.Dropdown(
        label="Select Model",
        choices=["gpt-4o-mini", "gpt-3.5-turbo"],
        value="gpt-4o-mini"
    )
    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages")
        image_output = gr.Image(height=500)
    audio_output = gr.Audio(label="AI Response Audio", autoplay=True)
    with gr.Row():
        entry = gr.Textbox(label="Chat with our AI Assistant:")
        audio_input = gr.Audio(sources=["microphone"], type="numpy", label="Speak your question")
        submit = gr.Button("Send")
    clear = gr.Button("Clear")

    def do_entry(message, history):
        history = history or []
        history.append({"role": "user", "content": message})
        return "", history

    def do_audio_input(audio, history, model_choice):
        if audio is None:
            return history, None, None
        transcribed = transcribe_audio(audio)
        history = history or []
        history.append({"role": "user", "content": transcribed})
        return history, None, None

    entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(
        chat, inputs=[chatbot, model_choice], outputs=[chatbot, image_output, audio_output]
    )
    audio_input.stop_recording(do_audio_input, inputs=[audio_input, chatbot, model_choice], outputs=[chatbot, image_output, audio_output]).then(
        chat, inputs=[chatbot, model_choice], outputs=[chatbot, image_output, audio_output]
    )
    clear.click(lambda: (None, None, None), inputs=None, outputs=[chatbot, image_output, audio_output], queue=False)

ui.launch(inbrowser=True)