<a href="https://colab.research.google.com/github/kamalesh06/Clg_Project/blob/main/untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install streamlit openai-whisper TTS transformers opennmt-py torch torchaudio soundfile

Collecting streamlit
  Downloading streamlit-1.43.0-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting TTS
  Downloading TTS-0.22.0-cp311-cp311-manylinux1_x86_64.whl.metadata (21 kB)
Collecting opennmt-py
  Downloading OpenNMT_py-3.5.1-py3-none-any.whl.metadata (8.8 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata 

In [None]:
import streamlit as st
import whisper
from TTS.api import TTS
import sqlite3
import os
import torchaudio
import torch
import tempfile
import soundfile as sf
from together import Together
import re

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Load Whisper model for Speech-to-Text
@st.cache_resource
def load_whisper_model():
    return whisper.load_model("small")

# Load Coqui TTS for Text-to-Speech
@st.cache_resource
def load_coqui_tts():
    return TTS(model_name = "tts_models/multilingual/multi-dataset/your_tts", progress_bar = False)

# Initialize SQLite database for storing interactions
def init_db():
    conn = sqlite3.connect('mahabalipuram.db')
    c = conn.cursor()
    c.execute('''CREATE TABLE IF NOT EXISTS interactions
                 (id INTEGER PRIMARY KEY, user_input TEXT, bot_response TEXT, language TEXT)''')
    conn.commit()
    return conn, c

# Generate bot response using DeepSeek API
def generate_response(user_input):

    try:
            # Replace with your API keys
        together_api = "5905f15b1de0cfa5e6283bcc7fa1de67b51ad6dc51acb618de83be3bdff2486b"
        client = Together(api_key = together_api)

        response = client.chat.completions.create(
            model = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
            messages = [{"role": "user", "content": user_input}],
            max_tokens = None,
            temperature = 0.7,
            top_p = 0.7,
            top_k = 50,
            repetition_penalty = 1,
            stream = False
            )

        # Check if the response contains the expected data
        if hasattr(response, "choices") and response.choices:
        # Extract response text
            raw_response = response.choices[0].message.content

            # Remove the <think> block using regex
            cleaned_response = re.sub(r"<think>.*?</think>", "", raw_response, flags = re.DOTALL).strip()
            return cleaned_response
        else:
            print("Error: No response choices found.")

    except Exception as e:
        print(f"An error occurred: {e}")

# Convert audio frame to text using Whisper
def process_audio(audio_file, whisper_model):
    audio, sr = sf.read(audio_file)

    # Ensure correct sample rate using torchaudio
    target_sr = 16000
    waveform = torch.tensor(audio).float()
    resampler = torchaudio.transforms.Resample(orig_freq = sr, new_freq = target_sr)
    resampled_audio = resampler(waveform).numpy()

    # Save resampled audio to temporary file using 'soundfile'
    with tempfile.NamedTemporaryFile(delete = False, suffix = ".wav") as tmp_audio:
        sf.write(tmp_audio.name, resampled_audio, target_sr)
        audio_file = tmp_audio.name

    # Transcribe with Whisper
    result = whisper_model.transcribe(audio_file)
    os.remove(audio_file)  # Cleanup
    return result["text"]

def main():
    st.title("Mahabalipuram AI Voice Bot")

    st.write("Welcome to the Mahabalipuram Heritage Assistant! Speak into your microphone or upload an audio file.")

    # Language selection
    language = st.selectbox("Select Language", ["English", "Tamil", "Hindi"])

    # Audio input
    st.write("### Record a voice message")
    audio_file = st.audio_input("Record a voice message")

    # Load models and database
    whisper_model = load_whisper_model()
    tts = load_coqui_tts()
    conn, cursor = init_db()

    if audio_file:
        st.write("Processing audio...")
        try:
            user_input = process_audio(audio_file, whisper_model)
            st.write(f"**You said:** {user_input}")

            bot_response = generate_response(user_input)
            st.write(f"**Bot Response (English):** {bot_response}")

            # Store interaction in the database
            cursor.execute("INSERT INTO interactions (user_input, bot_response, language) VALUES (?, ?, ?)", (user_input, bot_response, language))
            conn.commit()

            # Convert response to speech
            with tempfile.NamedTemporaryFile(delete = False, suffix = ".wav") as tmp_audio:
                tts.tts_to_file(text = bot_response,speaker = 'female-en-5',language = 'en', file_path = tmp_audio.name)
                audio_file = tmp_audio.name

            st.audio(audio_file)

            # Defer deletion of audio file until next session state reset
            st.session_state["last_audio_file"] = audio_file

        except Exception as e:
            st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

Writing main.py


In [None]:
streamlit run main.py