# Additional End of week Exercise - week 2

Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.

This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!

If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.

I will publish a full solution here soon - unless someone beats me to it...

There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results.

In [None]:
# Imports

import os, json, random, time, tempfile, subprocess
from io import BytesIO
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr


In [None]:
# OpenAI API initialization

load_dotenv(override=True)

openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
MODEL = "gpt-4o-mini"
openai = OpenAI()

In [None]:
# System message

system_message = """
You are a helpful assistant that can provide helpful suggestions on how to deal with problems in life.
When someone asks for advice, ask clarifying questions about their situation 
to understand their problem deeply, including all people involved.
When providing advice, try to stay positive. Do not put blame on anyone.
Try to suggest a way for your interlocutor to assess the situation objectively 
and find a common ground with all people involved.
You can build better connection with the person you are talking to by introducing yourself 
and giving them your name. Let them refer to you by your name.
"""

In [None]:
# AI tool definitions and helper functions.

random.seed()

def get_assistant_name():
    choices = ["Alan", "Beth", "Charlie", "David", "Elanor", "Gerald", "Helen"]
    return random.choice(choices)

get_assistant_name_tool = {
    "name": "get_assistant_name",
    "description": """Gets the name of the assistant (you) for the purpose of introducing yourself 
       to the person you are talking to.""",
    "impl": get_assistant_name
}

tools_impl = [{"type": "function", "function": get_assistant_name_tool}]
# Remove "impl" from tools for the purpose of calling OpenAI (not serializable as JSON)
tools = [
    {**t, "function": {k: v for k, v in t["function"].items() if k != "impl"} } for t in tools_impl
]


def call_tool(call):
    known = list(filter(lambda t: t["function"]["name"] == call.function.name, tools_impl))
    if len(known) == 0:
        raise ValueError(f"Unknown tool call: {call.function.name}")
    tool = list(known)[0]
    if tool["type"] == "function":
        args = json.loads(call.function.arguments)
        return tool["function"]["impl"](**args)
    else:
        raise ValueError(f"Unknown tool type: {tool.type}")


def handle_tool_calls(message):
    responses = []

    for call in message.tool_calls:
        try:
            result = call_tool(call)
            content = result if isinstance(result, str) else json.dumps(result)
            responses.append({
                "role": "tool",
                "tool_call_id": call.id,
                "content": content
            })
        except Exception as e:
            responses.append({
                "role": "tool",
                "tool_call_id": call.id,
                "content": json.dumps({"error": str(e)})
            })

    return responses

In [None]:
# Text-to-speech function
# Requires ffmpeg software to be installed on the system, see https://ffmpeg.org/ for details.

def say(message):
    if not message:
        return

    response = openai.audio.speech.create(
        model="gpt-4o-mini-tts",
        voice="shimmer",
        input=message,
        instructions="Please read this text in a friendly, relaxed, unhurried, and conversational tone.",
        response_format="mp3"
    )
    audio_stream = BytesIO(response.content)
    
    with tempfile.NamedTemporaryFile(delete=False, delete_on_close=False, suffix=".mp3", mode="wb") as temp_file:
        temp_file.write(audio_stream.read())
        temp_file.flush()

    try:
        subprocess.run(["ffplay", "-nodisp", "-autoexit", "-hide_banner", temp_file.name])
    finally:
        os.remove(temp_file.name)

In [None]:
# Chat function, handling the single turn of the LLM conversation loop

def chat(user_message, history, use_voice):
    history = history + [{"role": "user", "content": user_message}]
    messages = [{"role": "system", "content": system_message}] + history
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)

    if response.choices[0].finish_reason=="tool_calls":
        tool_message = response.choices[0].message
        tool_responses = handle_tool_calls(tool_message)
        messages.append(tool_message)
        for tr in tool_responses:
            messages.append(tr)
        response = openai.chat.completions.create(model=MODEL, messages=messages)
    
    reply = response.choices[0].message.content
    history += [{"role":"assistant", "content":reply}]
    if use_voice:
        say(reply)
    return history

In [None]:
# Set up Gradio UI

ui_running = True

with gr.Blocks() as appUI:
    with gr.Row():
        with gr.Column(scale=4):
            gr.Markdown("## OpenAI Chat with Voice Input and Output\nPress End Chat button to stop the conversation.")
        with gr.Column(scale=1):
            speech_enabled = gr.Checkbox(label="Make assistant speak", value=False)
        with gr.Column(scale=1):
            end_chat = gr.Button("End chat", variant="stop")

    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages")

    with gr.Row():
        text_input = gr.Textbox(label="Type your question")

    with gr.Row():
        audio_input = gr.Audio(
            sources=["microphone"], 
            type="filepath", 
            label="Press Record to speak your question"
        )
    
    def on_text_input(user_message, history, use_voice):
        return "", chat(user_message, history, use_voice)
    
    text_input.submit(
        on_text_input, 
        inputs=[text_input, chatbot, speech_enabled], 
        outputs=[text_input, chatbot]
    )

    def on_audio_input(audio_file, history, use_voice):
        if audio_file is None:
            return None, history
        with open(audio_file, "rb") as file:
            transcription = openai.audio.transcriptions.create(
                model="gpt-4o-mini-transcribe",
                file=file
            )
        return None, chat(transcription.text, history, use_voice)
    
    audio_input.change(
        on_audio_input,
        inputs=[audio_input, chatbot, speech_enabled],
        outputs=[audio_input, chatbot]
    )

    def on_end_chat():
        global ui_running
        ui_running = False
    
    end_chat.click(fn=on_end_chat)

appUI.launch()

# The following is useful for debugging (keeps the cell running, so breakpoints in other cells will work)
# To debug, open notebook in VS Code and use "debug cell" command. Comment out the code below if not needed.
while ui_running:
    time.sleep(1)
appUI.close()
