In [1]:
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr

In [2]:
load_dotenv(override=True)

openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
MODEL = "gpt-4.1-mini"
openai = OpenAI()

OpenAI API Key exists and begins sk-proj-


In [3]:
system_prompt = """
You are a helpful smart home voice controller assistant. You can perform actions like turning lights on and off, adjusting the thermostat temperature. 
You can also answer questions about the current state of the home, such as "What is the current temperature?" or "Are the lights on in the living room?".
"""

# Develop tools

In [4]:
def toggle_light(room, new_state):
    # Simulate toggling the light
    print(f"Toggling light in {room} to {new_state}")
    return (f"Light in {room} turned {new_state}.", room, new_state)

def set_thermostat(temp):
    print(f"The temperature for the whole house is set to {temp}")
    return (f"The temperature for the whole house is set to {temp}", temp)

In [5]:
log , room , state = toggle_light('bedroom' , 'on')
print (log)
print(room)
print(state)

Toggling light in bedroom to on
Light in bedroom turned on.
bedroom
on


In [6]:
log , temp = set_thermostat(77)
print(log)
print(temp)

The temperature for the whole house is set to 77
The temperature for the whole house is set to 77
77


In [7]:
# generatet dictionary structure that's required to describe the function
toggle_light_function = {
    "name": "toggle_light",
    "description": "Turns the light on or off in a specified room",
    "parameters": {
        "type": "object",
        "properties": {
            "room": {
                "type": "string",
                "description": "The room in which the customer wants to toggle the light (e.g., 'living room', 'bedroom')",
            },
            "new_state": {
                "type": "string",
                "description": "The new state for the light, either 'on' or 'off'",
                "enum": ["on", "off"]
            }
        },
        "required": ["room", "new_state"],
        "additionalProperties": False
    }
}
set_temp_function = {
    "name": "set_thermostat",
    "description": "Sets the thermostat temperature for the whole house",
    "parameters": {
        "type": "object",
        "properties": {
            "temp": {
                "type": "integer",
                "description": "The desired temperature to set the thermostat to",
            }
        },
        "required": ["temp"],
        "additionalProperties": False
    }
}

In [8]:
# List of tools
tools = [{"type": "function", "function": toggle_light_function}, 
         {"type": "function", "function": set_temp_function}]

In [18]:
def chat(text_message, audio_input, history, current_state):
    current_state = current_state.copy()
    
    # Use text input if provided, otherwise transcribe audio
    if text_message and text_message.strip():
        message = text_message
    elif audio_input:
        message = transcribe(audio_input)
        print(f"Transcribed audio: {message}")
    else:
        return "", history, current_state, f"Living Room: {current_state['living_room_light']} | Bedroom: {current_state['bedroom_light']}", current_state["temperature"], None
    
    history_formatted = [{"role": h["role"], "content": h["content"]} for h in history]
    messages = [{"role": "system", "content": system_prompt}] + history_formatted + [{"role": "user", "content": message}]
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)
    
    while response.choices[0].finish_reason == "tool_calls":
        print("Tool call detected. Executing tool...")
        msg = response.choices[0].message
        messages.append(msg)
        tool_response, current_state = handle_tool_calls(msg, current_state)
        messages.append(tool_response)
        response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)
    
    ai_reply = response.choices[0].message.content
    
    # Update chat history for Gradio Chatbot
    new_history = history + [
        {"role": "user", "content": message},
        {"role": "assistant", "content": ai_reply}
    ]
    
    # Generate voice output
    voice = talker(ai_reply)
    
    # Format the UI labels
    light_label = f"Living Room: {current_state['living_room_light']} | Bedroom: {current_state['bedroom_light']}"
        
    return "", new_history, current_state, light_label, current_state["temperature"], voice

In [19]:
def transcribe(audio_path):
    if not audio_path:
        return None
    with open(audio_path, "rb") as audio_file:
        transcription = openai.audio.transcriptions.create(
            model="whisper-1",
            file=audio_file
        )
    return transcription.text

def talker(message):
    response = openai.audio.speech.create(
        model="gpt-4o-mini-tts",
        voice="onyx", 
        input=message
    )
    return response.content

In [20]:
def handle_tool_calls(message, current_state):
    print("Handling tool call...")
    
    current_state = current_state.copy()
    log = "Unknown tool called"  # Default value

    tool_call = message.tool_calls[0]
    arguments = json.loads(tool_call.function.arguments)
    
    if tool_call.function.name == "toggle_light":
        print("Executing toggle_light function...")
        
        room = arguments.get('room')
        new_state = arguments.get('new_state')
        print(f"Tool call arguments: room={room}, new_state={new_state}")
        log, room, state = toggle_light(room, new_state)
        
        # Update current_state
        if "living" in room.lower():
            current_state['living_room_light'] = state.upper()
        elif "bedroom" in room.lower():
            current_state['bedroom_light'] = state.upper()

    elif tool_call.function.name == "set_thermostat":
        print("Executing set_thermostat function...")
        temp = arguments.get('temp')
        log, temp = set_thermostat(temp)
        current_state['temperature'] = temp
        
    return { 
        "role": "tool",
        "content": log,
        "tool_call_id": tool_call.id
    }, current_state

## Lets design a UI for gradio 

In [21]:
# UI definition 

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    home_state = gr.State({
        "living_room_light": "OFF",
        "bedroom_light": "OFF",
        "temperature": 72
    })

    gr.Markdown("# üè† AI Smart Home Voice Controller")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Current Status")
            light_ui = gr.Textbox(value="Living Room: OFF | Bedroom: OFF", label="Lights", interactive=False)
            temp_ui = gr.Number(value=72, label="Thermostat (¬∞F)", interactive=False)
        
        # Interaction Column
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Home Assistant Log", type="messages")
            with gr.Row():
                audio_input = gr.Audio(sources="microphone", type="filepath", label="Voice Command")
                text_input = gr.Textbox(placeholder="Or type a command here...", label="Text Command")
            with gr.Row():
                audio_output = gr.Audio(autoplay=True, label="Response")

    # Defining the logic flow - text input submit
    text_input.submit(
        chat, 
        inputs=[text_input, audio_input, chatbot, home_state], 
        outputs=[text_input, chatbot, home_state, light_ui, temp_ui, audio_output]
    )
    
    # Audio input - trigger when recording stops
    audio_input.stop_recording(
        chat,
        inputs=[text_input, audio_input, chatbot, home_state],
        outputs=[text_input, chatbot, home_state, light_ui, temp_ui, audio_output]
    )

In [None]:
demo.launch(inbrowser=True)

Rerunning server... use `close()` to stop if you need to change `launch()` parameters.
----
* To create a public link, set `share=True` in `launch()`.




Transcribed audio: Turn the bedroom lights on.
Tool call detected. Executing tool...
Handling tool call...
Executing toggle_light function...
Tool call arguments: room=bedroom, new_state=on
Toggling light in bedroom to on
Transcribed audio: Set the thermostat temperature to be at 65 in the house.
Tool call detected. Executing tool...
Handling tool call...
Executing set_thermostat function...
The temperature for the whole house is set to 65
