<a href="https://colab.research.google.com/github/kmalhotra18/Airline-Chatbot/blob/main/Airline_Chatbot_with_Spanish_voice_input.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ✈️ FLIGHTAI: Multimodal Airline Chatbot (Claude + Whisper + DALL·E)

# =======================
# 📦 Imports
# =======================
import os
import requests
import json
import base64
import io
import time
import tempfile
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from scipy.io.wavfile import write as wav_write, read as wav_read
from dotenv import load_dotenv
from openai import OpenAI
import anthropic
import ffmpeg
from IPython.display import HTML, display, Audio as IPyAudio
from google.colab.output import eval_js

# =======================
# 🔐 Environment Setup
# =======================
load_dotenv()
openai = OpenAI()
claude = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

# =======================
# 🧠 System Prompt
# =======================
system_message = (
    "You are a helpful assistant for an airline called FlightAI. "
    "Give short, courteous answers, no more than 1 sentence. "
    "Always be accurate. If you don't know the answer, say so."
)

# =======================
# 💳 Price / Booking Data
# =======================
ticket_prices = {"london": "$799", "paris": "$899", "tokyo": "$1400", "berlin": "$499"}

def get_ticket_price(city):
    return ticket_prices.get(city.lower(), "Unknown")

def book_flight(destination_city, departure_date, return_date, passenger_name):
    return {
        "confirmation": f"Booking confirmed for {passenger_name} to {destination_city}",
        "flight_number": "AI202",
        "departure_date": departure_date,
        "return_date": return_date,
        "gate": "A12"
    }

def detect_city(text):
    for city in ticket_prices:
        if city in text.lower():
            return city
    return None

# =======================
# 🖼️ Image Generation (DALL·E)
# =======================
def artist(city):
    try:
        response = openai.images.generate(
            model="dall-e-3",
            prompt=f"A vibrant pop-art style image of vacation in {city}",
            size="1024x1024",
            n=1,
            response_format="b64_json",
        )
        image_data = base64.b64decode(response.data[0].b64_json)
        return Image.open(io.BytesIO(image_data))
    except Exception as e:
        print(f"Image error: {e}")
        return None

# =======================
# 🗣️ Text-to-Speech (OpenAI)
# =======================
def talker(message):
    response = openai.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=message
    )
    audio_path = "response.mp3"
    with open(audio_path, "wb") as f:
        f.write(response.content)
    return audio_path

# =======================
# 🌐 Translation (Claude)
# =======================
def translate_to_spanish_claude(text):
    response = claude.messages.create(
        model="claude-3-haiku-20240307",
        system="Translate the following English to Spanish.",
        max_tokens=400,
        messages=[{"role": "user", "content": text}]
    )
    return response.content[0].text.strip()

# =======================
# 🤖 Claude Chat Engine
# =======================
def convert_history_to_prompt(history):
    prompt = ""
    for turn in history:
        role = turn["role"]
        content = turn["content"]
        if role == "user":
            prompt += f"\nHuman: {content}"
        else:
            prompt += f"\nAssistant: {content}"
    return prompt + "\nAssistant:"

def chat_claude(history):
    messages = [{"role": "user", "content": convert_history_to_prompt(history)}]
    response = claude.messages.create(
        model="claude-3-haiku-20240307",
        system=system_message,
        max_tokens=1000,
        temperature=0.7,
        messages=messages
    )
    reply = response.content[0].text.strip()
    city = detect_city(history[-1]["content"] if history else "") or detect_city(reply)
    image = artist(city) if city else None
    reply = f"The price to {city.title()} is {get_ticket_price(city)}." if city else reply
    spanish = translate_to_spanish_claude(reply)
    audio_path = talker(reply)
    history.append({"role": "assistant", "content": reply})
    return history, image, audio_path, spanish

# =======================
# 🎧 Microphone Input Recorder (Colab JS)
# =======================
AUDIO_HTML = """
<script>
  var data = '';
  navigator.mediaDevices.getUserMedia({ audio: true })
  .then(stream => {
    const mediaRecorder = new MediaRecorder(stream);
    mediaRecorder.start();
    const audioChunks = [];
    mediaRecorder.addEventListener("dataavailable", event => { audioChunks.push(event.data); });
    mediaRecorder.addEventListener("stop", () => {
      const audioBlob = new Blob(audioChunks);
      const reader = new FileReader();
      reader.readAsDataURL(audioBlob);
      reader.onloadend = () => {
        data = reader.result;
        google.colab.kernel.invokeFunction('notebook.get_audio', [], {});
      }
    });
    setTimeout(() => { mediaRecorder.stop(); }, 5000);
  });
</script>
"""

def get_audio():
    display(HTML(AUDIO_HTML))
    time.sleep(6)
    data = eval_js("data")
    if not data or ',' not in data:
        raise ValueError("No audio or invalid format")
    binary = b64decode(data.split(',')[1])
    process = ffmpeg.input('pipe:0').output('pipe:1', format='wav') \
             .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
    output, err = process.communicate(input=binary)
    riff_chunk_size = len(output) - 8
    b = [0] * 4
    q = riff_chunk_size
    for i in range(4): q, r = divmod(q, 256); b[i] = r
    riff = output[:4] + bytes(b) + output[8:]
    sr, audio = wav_read(io.BytesIO(riff))
    return audio, sr

# =======================
# 😊 Voice Entry + Chat Execution
# =======================
def run_voice_chat(history):
    print("\nSpeak now (5 seconds)...")
    audio, sr = get_audio()
    plt.figure(figsize=(20, 4))
    plt.plot(audio)
    plt.title("Voice Input Waveform")
    plt.show()

    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
        wav_write(tmp.name, sr, audio)
        audio_path = tmp.name

    with open(audio_path, "rb") as f:
        transcript = openai.audio.transcriptions.create(
            model="whisper-1",
            file=f
        ).text

    print("\n📝 Transcribed:", transcript)
    history.append({"role": "user", "content": transcript})
    return chat_claude(history)

# =======================
# 🔧 Run Chatbot
# =======================
history = []

# Run voice chatbot (feel free to run multiple times)
history, img, audio_path, spanish = run_voice_chat(history)

print("\n💬 English Response:", history[-1]["content"])
print("\n🇪🇸 Spanish Translation:", spanish)
if img:
    display(img)
display(IPyAudio(audio_path, autoplay=True))
