<a href="https://colab.research.google.com/github/kamalesh06/Clg_Project/blob/main/untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%writefile main.py
import streamlit as st
import whisper
from TTS.api import TTS
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import sqlite3
import os
import numpy as np
import torchaudio
import torch
import tempfile
import soundfile as sf  # Fix for saving audio files

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Load Whisper model for Speech-to-Text
@st.cache_resource
def load_whisper_model():
    return whisper.load_model("small")

# Load Coqui TTS for Text-to-Speech
@st.cache_resource
def load_coqui_tts():
    return TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)

# Load Mistral-7B Model (Optimized for M1 Mac)
@st.cache_resource
def load_llm():
    model_name = "mistralai/Mistral-7B-v0.1"

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float32 if device == "mps" else torch.float16,  # Ensure MPS compatibility
        device_map="cpu" if device == "mps" else "auto",  # Force CPU for MPS
    )

    return pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device != "mps" else -1)

# Initialize SQLite database for storing interactions
def init_db():
    conn = sqlite3.connect('mahabalipuram.db')
    c = conn.cursor()
    c.execute('''CREATE TABLE IF NOT EXISTS interactions
                 (id INTEGER PRIMARY KEY, user_input TEXT, bot_response TEXT, language TEXT)''')
    conn.commit()
    return conn, c

# Generate bot response
def generate_response(user_input):
    generator = load_llm()
    response = generator(user_input, max_new_tokens=100)  # Prevent cutting off mid-sentence
    return response[0]['generated_text'].strip()

# Convert audio frame to text using Whisper
def process_audio(audio_file, whisper_model):
    # Load audio file
    audio, sr = sf.read(audio_file)

    # Ensure correct sample rate using torchaudio
    target_sr = 16000
    waveform = torch.tensor(audio).float()
    resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
    resampled_audio = resampler(waveform).numpy()

    # Save resampled audio to temporary file using `soundfile`
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
        sf.write(tmp_audio.name, resampled_audio, target_sr)
        audio_file = tmp_audio.name

    # Transcribe with Whisper
    result = whisper_model.transcribe(audio_file)
    os.remove(audio_file)  # Cleanup
    return result["text"]

def main():
    st.title("Mahabalipuram AI Voice Bot")

    st.write("Welcome to the Mahabalipuram Heritage Assistant! Speak into your microphone or upload an audio file.")

    # Language selection
    language = st.selectbox("Select Language", ["Tamil", "English", "Hindi"])

    # Audio input
    st.write("### Record a voice message")
    audio_file = st.audio_input("Record a voice message")

    # Load models and database
    whisper_model = load_whisper_model()
    tts = load_coqui_tts()
    conn, cursor = init_db()

    if audio_file:
        st.write("Processing audio...")
        try:
            user_input = process_audio(audio_file, whisper_model)
            st.write(f"**You said:** {user_input}")

            bot_response = generate_response(user_input)
            st.write(f"**Bot Response (English):** {bot_response}")

            # Store interaction in the database
            cursor.execute("INSERT INTO interactions (user_input, bot_response, language) VALUES (?, ?, ?)",
                           (user_input, bot_response, language))
            conn.commit()

            # Convert response to speech
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
                tts.tts_to_file(text=bot_response, file_path=tmp_audio.name)
                audio_file = tmp_audio.name

            st.audio(audio_file)

            # Defer deletion of audio file until next session state reset
            st.session_state["last_audio_file"] = audio_file

        except Exception as e:
            st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

In [None]:
streamlit run main.py