In [123]:
# Multi modal with DALLE3 and TTS and gradio
# Agentic AI

# Our Agent Framework

The term 'Agentic AI' and Agentization is an umbrella term that refers to a number of techniques, such as:

1. Breaking a complex problem into smaller steps, with multiple LLMs carrying out specialized tasks
2. The ability for LLMs to use Tools to give them additional capabilities
3. The 'Agent Environment' which allows Agents to collaborate
4. An LLM can act as the Planner, dividing bigger tasks into smaller ones for the specialists
5. The concept of an Agent having autonomy / agency, beyond just responding to a prompt - such as Memory

We're showing 1 and 2 here, and to a lesser extent 3 and 5. In week 8 we will do the lot!

In [124]:
# imports

import os
import requests
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
from IPython.display import display, Markdown, Audio
import json
import sqlite3

In [125]:
# Some imports for handling images

import base64
from io import BytesIO
from PIL import Image

In [126]:
# Load environment variables in a file .env

load_dotenv()
openai_key=os.getenv("OPENAI_API_KEY")

# Check the key
if not openai_key:
    print("An API key was not found")
elif openai_key[:8]!="sk-proj-":
    print("An API key was found, but it dosen't start with sk-proj-")
elif openai_key.strip()!= openai_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them")
else:
    print("An API key was found and looks good")

An API key was found and looks good


In [127]:
# Create an instance of openai
openai = OpenAI()
openai_model = "gpt-4o-mini"

In [128]:
system_message = "You are a helpful assistant for an Airline called FlightAI." \
"Warmly welcome the user to FlightAI assistant." \
"Give short, courtese answers, no more than 1 sentence." \
"Always be accurate. If you don't know the answer, say so."


In [129]:
def artist(city):
    image_response = openai.images.generate(
            model="dall-e-3",
            prompt=f"An image representing a vacation in {city}, showing tourist spots and everything unique about {city}, in a vibrant pop-art style",
            size="1024x1024",
            n=1,
            response_format="b64_json",
        )
    image_base64 = image_response.data[0].b64_json
    image_data = base64.b64decode(image_base64)
    return Image.open(BytesIO(image_data))

In [130]:
# image = artist("New York City")
# display(image)

In [131]:
def talker(message):
    response = openai.audio.speech.create(
        model="gpt-4o-mini-tts",
        voice="onyx",
        input=message)

    audio_stream = BytesIO(response.content)
    output_filename = "output_audio.mp3"
    with open(output_filename, "wb") as f:
        f.write(audio_stream.read())

    # Play the generated audio
    display(Audio(output_filename, autoplay=True))

In [132]:
# talker('Hi there')

In [133]:
DB = "prices.db"

def get_ticket_price(city):
    print (f"Database Tool Called: Getting price for {city}", flush=True)
    with sqlite3.connect(DB) as conn:
        cursor = conn.cursor()
        cursor.execute("SELECT price FROM prices WHERE city = ?", (city.lower(),))
        result = cursor.fetchone()
    return f"Ticket price to {city} is ${result[0]}" if result else "No price data available for this city"

In [134]:
# We have to write that function handle_tool_call:

def handle_tool_call(message):
    responses = []
    cities = []
    for tool_call in message.tool_calls:
        if tool_call.function.name == "get_ticket_price":
            arguments = json.loads(tool_call.function.arguments)
            city = arguments.get('destination_city')
            cities.append(city)
            price_details = get_ticket_price(city)
            responses.append({
                "role": "tool",
                "content": price_details,
                "tool_call_id": tool_call.id
            })
    return responses, cities

In [135]:
# There's a particular dictionary structure that's required to describe our function:

price_function = {
    "name": "get_ticket_price",
    "description": "Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'",
    "parameters": {
        "type": "object",
        "properties": {
            "destination_city": {
                "type": "string",
                "description": "The city that the customer wants to travel to",
            },
        },
        "required": ["destination_city"],
        "additionalProperties": False
    }
}

In [136]:
# And this is included in a list of tools:

tools = [{"type": "function", "function": price_function}]

In [137]:
# def chat(history):
#     history = [{"role": h["role"], "content": h["content"]} for h in history]
#     messages = [{"role": "system", "content": system_message}] + history
#     response = openai.chat.completions.create(model=openai_model, messages=messages, tools=tools)
#     image = None
#     cities = []
    
#     while response.choices[0].finish_reason=="tool_calls":
#         message = response.choices[0].message
#         response, cities = handle_tool_call(message)
#         messages.append(message)
#         messages.append(response)
#         image = artist(cities)
#         response = openai.chat.completions.create(model=openai_model, messages=messages, tools=tools)
        
#     reply = response.choices[0].message.content
#     history += [{"role":"assistant", "content":reply}]

#     voice = talker(reply)
#     if cities:
#         image = artist(cities[0])
    
#     return history, voice, image

In [138]:
def chat(history):
    # Convert history to the proper format if needed
    messages = [{"role": "system", "content": system_message}]
    
    # Add history messages maintaining their role/content structure
    for msg in history:
        messages.append(msg)
    
    response = openai.chat.completions.create(model=openai_model, messages=messages, tools=tools)
    image = None
    cities = []
    
    # Handle tool calls if present
    if response.choices[0].finish_reason == "tool_calls":
        message = response.choices[0].message
        tool_responses, cities = handle_tool_call(message)
        
        # Append the assistant message with tool calls
        messages.append(message)
        
        # Append each tool response individually
        for tool_response in tool_responses:
            messages.append(tool_response)
        
        # Get final response after tool calls
        response = openai.chat.completions.create(model=openai_model, messages=messages)
    
    reply = response.choices[0].message.content
    history = history + [{"role": "assistant", "content": reply}]

    # Generate audio
    talker(reply)  # This function already handles audio display internally
    
    # Generate image if cities were found
    if cities:
        image = artist(cities[0])
    
    return history, "output_audio.mp3", image

In [139]:
# Callback
def put_message_in_chatbot(message,history):
    return "", history + [{"role": "user", "content": message}]

Tools

In [140]:
# Gradio UI blocks implementation
with gr.Blocks() as ui:
    with gr.Row():
        chatbot = gr.Chatbot(height=500)
        image_output = gr.Image(height=500, interactive=False)
    with gr.Row():
        audio_output = gr.Audio(autoplay=True)
    with gr.Row():
        message = gr.Textbox(label="Chat with out AI Assistant")
    # Hooking up events to callbacks
    message.submit(put_message_in_chatbot, inputs=[message,chatbot], outputs=[message,chatbot]).then(
        chat, inputs=chatbot, outputs=[chatbot,audio_output, image_output]
    )

    ui.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7876
* To create a public link, set `share=True` in `launch()`.


Database Tool Called: Getting price for London
