# Importing requirements

In [1]:
import os
from dotenv import load_dotenv
import gradio as gr
from openai import OpenAI
import json
import base64
from io import BytesIO
from PIL import Image
from pydub import AudioSegment
from pydub.playback import play

In [2]:
# loading api key

load_dotenv(override =True)
api_key = os.getenv("OPENAI_API_KEY")
openai = OpenAI()
chatModel = "gpt-4o-mini"
speechModel = "tts-1"
imageModel = "dall-e-2"

# Creating a tools for our Airline assistant

In [3]:
# creating tools function
ticket_prices = {"london": "$799", "paris": "$899", "tokyo": "$1400", "berlin": "$499"}

def get_ticket_price(city):
    city = city.lower()
    return ticket_prices.get(city,"unknown")
    

In [4]:
#creating a dictionary to describe our tool function

price_function = {
    "name": "get_ticket_price",
    "description": "Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price",
    "parameters" : {
        "type" : "object",
        "properties" : {
            "destination_city" :{
                "type" : "string",
                "description": "The city that the customer wants to travel to",
            },
        },
        "required": ["destination_city"],
        "additionalProperties": False
    }
}

In [5]:
# create a keyword pointing tools function
tools = [{"type": "function", "function": price_function}]

In [6]:
#  writing a function to handle_tool_call:

def handle_tool_call(message):
    tool_call = message.tool_calls[0]
    arguments = json.loads(tool_call.function.arguments)
    city = arguments.get('destination_city')
    price = get_ticket_price(city)
    response = {
        "role": "tool",
        "content": json.dumps({"destination_city": city,"price": price}),
        "tool_call_id": tool_call.id
    }
    return response, city

# Creating a talker function to talk(text to speech)

In [7]:
def talker(msg):
    response = openai.audio.speech.create(
        model = speechModel,
        voice = "onyx",
        input = msg)

    audio_stream = BytesIO(response.content)
    audio = AudioSegment.from_file(audio_stream, format ="mp3")
    play(audio)

# Creating a function for image generation

In [8]:
def artist(city):
    image_resp = openai.images.generate(
        model = imageModel,
        prompt = f"An image representing a vacation in a {city} and tourist spots and everything unique about the {city}",
        size = "1024x1024",
        n=1,
        response_format = "b64_json")

    image_base64 = image_resp.data[0].b64_json
    image_data = base64.b64decode(image_base64)
    return Image.open(BytesIO(image_data))


# Creating a chat function to integrate Image and Speech multimodal

In [9]:
sys_prompt = "You are a helpful assistant for an Airline called FlightAI.\
Give short, courteous answers, no more than 1 sentence. \
Always be accurate. If you don't know the answer, say so."

def chat(message, history):
    print("called chat function")
    image=None
    convo = [{"role": "system", "content": sys_prompt}]
    
    for human, assistant in history:
        convo.append({"role":"user", "content": human})
        convo.append({"role":"assistant", "content": assistant})
    convo.append({"role":"user", "content": message})
    print("appended messages")
    
    resp = openai.chat.completions.create( model= chatModel, messages=convo, tools = tools)
    print("called gpt 1st time")
          
    if resp.choices[0].finish_reason == "tool_calls":
        print("Handling tool calls")
        message = tool_call = resp.choices[0].message
        response, city = handle_tool_call(message)
        print("tool call success")
        convo.append(message)
        convo.append(response)
        image = artist(city)
        print("image success")
        resp = openai.chat.completions.create(model = chatModel, messages = convo)

    reply = resp.choices[0].message.content
    talker(reply)
    return reply, image

# Creating an UI using Gradio

In [10]:
with gr.Blocks() as ui:
    with gr.Row():
        chatbot = gr.Chatbot(height = 500)
        Imagebox = gr.Image(height = 500)
    with gr.Row():
        msg = gr.Textbox(label = "Chat with our AI assistant")
    with gr.Row():
        clear = gr.Button("clear")

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(history):
        user_message = history[-1][0]
        bot_msg, image = chat(user_message, history[:-1])
        history[-1][1] = bot_msg
        return history, image

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, [chatbot,Imagebox])

    clear.click(lambda: None, None, chatbot, queue = False)

ui.launch()
        



* Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.




called chat function
appended messages
called gpt 1st time


Input #0, wav, from '/var/folders/dp/5vg_3lhn6tz7w33s1288bclr0000gn/T/tmpd338tero.wav':
  Duration: 00:00:02.11, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   1.98 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 


called chat function
appended messages
called gpt 1st time
Handling tool calls
tool call success
image success


Input #0, wav, from '/var/folders/dp/5vg_3lhn6tz7w33s1288bclr0000gn/T/tmpei_e15j2.wav':
  Duration: 00:00:03.46, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   3.37 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 


