In [None]:
# PART 1: SETUP AND DEPENDENCIES
# Run this in Google Colab first

# Install dependencies
!pip install streamlit>=1.28.0
!pip install openai-whisper
!pip install sentence-transformers
!pip install faiss-cpu
!pip install gtts
!pip install pydub
!pip install groq
!pip install python-docx
!pip install PyPDF2
!pip install soundfile
!pip install librosa
!pip install torch
!pip install pyngrok

# System packages for Colab
!apt-get update &> /dev/null
!apt-get install -y ffmpeg &> /dev/null

print("✅ Dependencies installed successfully!")

In [None]:
# PART 2: IMPORTS AND CONFIGURATION

import streamlit as st
import whisper
import torch
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import json
import os
from gtts import gTTS
from pydub import AudioSegment
import tempfile
import io
from pathlib import Path
import PyPDF2
from docx import Document
import logging
from typing import List, Dict, Tuple, Optional
import time
from datetime import datetime
import base64
import soundfile as sf
import librosa
from groq import Groq
import re

# For Colab compatibility
try:
    import IPython.display as ipd
    from google.colab import files
    COLAB_MODE = True
    print("✅ Running in Google Colab mode")
except ImportError:
    COLAB_MODE = False
    print("✅ Running in local mode")

# Configuration Class
class Config:
    """Configuration class for the RAG system"""

    # API Configuration
    GROQ_API_KEY = ""  # Will be set via Streamlit input
    GROQ_MODEL = "llama3-8b-8192"  # Llama model via Groq

    # Model configurations
    WHISPER_MODEL = "base"  # Options: tiny, base, small, medium, large
    EMBEDDING_MODEL = "all-MiniLM-L6-v2"  # Multilingual sentence transformer

    # Supported languages
    SUPPORTED_LANGUAGES = {
        'en': 'English',
        'es': 'Spanish',
        'fr': 'French',
        'de': 'German',
        'it': 'Italian',
        'pt': 'Portuguese',
        'ru': 'Russian',
        'zh': 'Chinese',
        'ja': 'Japanese',
        'ko': 'Korean',
        'hi': 'Hindi',
        'ar': 'Arabic'
    }

    # File paths
    DOCUMENTS_DIR = "documents"
    AUDIO_DIR = "audio"
    EMBEDDINGS_FILE = "embeddings.index"
    METADATA_FILE = "metadata.json"

    # Vector database settings
    EMBEDDING_DIM = 384
    TOP_K_RETRIEVAL = 5

    # Audio settings
    SAMPLE_RATE = 16000
    AUDIO_FORMAT = "wav"

    # System prompts
    SYSTEM_PROMPT = """You are an AI assistant designed to help visually impaired users access and understand content.
    You provide clear, concise, and helpful responses based on retrieved documents.
    Always respond in the same language as the user's question.
    Be empathetic and accessibility-focused in your responses."""

print("✅ Configuration loaded successfully!")

In [None]:
# PART 3: DOCUMENT PROCESSING

class DocumentProcessor:
    """Handles document loading and preprocessing"""

    def __init__(self):
        self.supported_formats = ['.txt', '.pdf', '.docx', '.md']
        print("✅ DocumentProcessor initialized")

    def load_documents(self, directory: str) -> List[Dict]:
        """Load and process documents from directory"""
        documents = []

        if not os.path.exists(directory):
            os.makedirs(directory)
            print(f"Created directory: {directory}")
            return documents

        for file_path in Path(directory).rglob('*'):
            if file_path.suffix.lower() in self.supported_formats:
                try:
                    content = self._extract_text(file_path)
                    if content.strip():
                        chunks = self._chunk_text(content)
                        for i, chunk in enumerate(chunks):
                            documents.append({
                                'id': f"{file_path.stem}_{i}",
                                'filename': file_path.name,
                                'filepath': str(file_path),
                                'content': chunk,
                                'chunk_index': i,
                                'total_chunks': len(chunks)
                            })
                        print(f"Processed: {file_path.name} -> {len(chunks)} chunks")
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

        print(f"✅ Loaded {len(documents)} document chunks total")
        return documents

    def _extract_text(self, file_path: Path) -> str:
        """Extract text from different file formats"""
        try:
            if file_path.suffix.lower() == '.txt':
                return file_path.read_text(encoding='utf-8', errors='ignore')

            elif file_path.suffix.lower() == '.pdf':
                text = ""
                with open(file_path, 'rb') as file:
                    reader = PyPDF2.PdfReader(file)
                    for page in reader.pages:
                        text += page.extract_text() + "\n"
                return text

            elif file_path.suffix.lower() == '.docx':
                doc = Document(file_path)
                return '\n'.join([paragraph.text for paragraph in doc.paragraphs])

            elif file_path.suffix.lower() == '.md':
                return file_path.read_text(encoding='utf-8', errors='ignore')

        except Exception as e:
            print(f"Error extracting text from {file_path}: {e}")
            return ""

        return ""

    def _chunk_text(self, text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
        """Split text into overlapping chunks"""
        words = text.split()
        chunks = []

        for i in range(0, len(words), chunk_size - overlap):
            chunk = ' '.join(words[i:i + chunk_size])
            if chunk.strip():
                chunks.append(chunk)

            if i + chunk_size >= len(words):
                break

        return chunks if chunks else [text]

# Test the DocumentProcessor
def test_document_processor():
    """Test the document processor"""
    print("Testing DocumentProcessor...")

    # Create test directory and file
    test_dir = "test_documents"
    os.makedirs(test_dir, exist_ok=True)

    # Create sample document
    with open(f"{test_dir}/sample.txt", "w") as f:
        f.write("This is a sample document for testing the RAG system. " * 20)

    # Test processor
    processor = DocumentProcessor()
    docs = processor.load_documents(test_dir)

    print(f"✅ Test completed: {len(docs)} chunks processed")
    return processor

# Run test
if __name__ == "__main__":
    test_processor = test_document_processor()

In [None]:
# PART 4: VOICE PROCESSING

class VoiceProcessor:
    """Handles speech-to-text and text-to-speech conversion"""

    def __init__(self):
        print("Loading Whisper model...")
        self.whisper_model = whisper.load_model(Config.WHISPER_MODEL)
        self.audio_dir = Config.AUDIO_DIR
        os.makedirs(self.audio_dir, exist_ok=True)
        print("✅ VoiceProcessor initialized")

    def speech_to_text(self, audio_file_path: str) -> Tuple[str, str]:
        """Convert speech to text using Whisper"""
        try:
            print(f"Transcribing audio: {audio_file_path}")
            result = self.whisper_model.transcribe(audio_file_path)
            text = result["text"].strip()
            language = result.get("language", "en")

            print(f"✅ Transcription completed: '{text}' (language: {language})")
            return text, language

        except Exception as e:
            print(f"Error in speech-to-text: {e}")
            return "", "en"

    def speech_to_text_from_bytes(self, audio_data: bytes) -> Tuple[str, str]:
        """Convert speech to text from audio bytes"""
        try:
            # Save audio data to temporary file
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
                tmp_file.write(audio_data)
                tmp_file_path = tmp_file.name

            # Transcribe
            text, language = self.speech_to_text(tmp_file_path)

            # Clean up
            os.unlink(tmp_file_path)

            return text, language

        except Exception as e:
            print(f"Error in speech-to-text from bytes: {e}")
            return "", "en"

    def text_to_speech(self, text: str, language: str = "en") -> bytes:
        """Convert text to speech using gTTS"""
        try:
            print(f"Generating speech for: '{text[:50]}...' (language: {language})")

            # Map language code for gTTS
            tts_lang_map = {
                'en': 'en', 'es': 'es', 'fr': 'fr', 'de': 'de', 'it': 'it',
                'pt': 'pt', 'ru': 'ru', 'zh': 'zh', 'ja': 'ja', 'ko': 'ko',
                'hi': 'hi', 'ar': 'ar'
            }
            tts_lang = tts_lang_map.get(language, "en")

            # Generate speech
            tts = gTTS(text=text, lang=tts_lang, slow=False)

            # Save to bytes
            audio_buffer = io.BytesIO()
            tts.write_to_fp(audio_buffer)
            audio_buffer.seek(0)

            print("✅ Speech generation completed")
            return audio_buffer.getvalue()

        except Exception as e:
            print(f"Error in text-to-speech: {e}")
            # Fallback to English
            try:
                tts = gTTS(text=text, lang="en", slow=False)
                audio_buffer = io.BytesIO()
                tts.write_to_fp(audio_buffer)
                audio_buffer.seek(0)
                return audio_buffer.getvalue()
            except:
                return b""

    def save_audio_file(self, audio_data: bytes, filename: str) -> str:
        """Save audio data to file"""
        filepath = os.path.join(self.audio_dir, filename)
        with open(filepath, 'wb') as f:
            f.write(audio_data)
        print(f"✅ Audio saved: {filepath}")
        return filepath

# Test the VoiceProcessor
# Test the VoiceProcessor
def test_voice_processor():
    """Test the voice processor"""
    print("Testing VoiceProcessor...")

    # Initialize processor
    processor = VoiceProcessor()

    # Test text-to-speech
    test_text = "Hello, this is a test of the voice processing system."
    audio_data = processor.text_to_speech(test_text, "en")

    if audio_data:
        # Save test audio
        audio_file = processor.save_audio_file(audio_data, "test_output.mp3")
        print(f"✅ Test audio generated: {len(audio_data)} bytes")

        # If in Colab, display audio player
        if COLAB_MODE:
            ipd.display(ipd.Audio(audio_data))

    return processor

# Run test
if __name__ == "__main__":
    test_voice = test_voice_processor()


In [None]:
# PART 5: VECTOR DATABASE AND RETRIEVAL

class VectorDatabase:
    """Handles document embeddings and similarity search"""

    def __init__(self):
        print("Loading embedding model...")
        self.embedding_model = SentenceTransformer(Config.EMBEDDING_MODEL)
        self.index = None
        self.documents = []
        self.metadata = []
        print("✅ VectorDatabase initialized")

    def build_index(self, documents: List[Dict]):
        """Build FAISS index from documents"""
        self.documents = documents

        if not documents:
            print("⚠️ No documents found to index")
            return False

        print(f"Building index for {len(documents)} document chunks...")

        # Extract text content
        texts = [doc['content'] for doc in documents]

        # Generate embeddings
        print("Generating embeddings...")
        embeddings = self.embedding_model.encode(texts, show_progress_bar=True)

        # Build FAISS index
        self.index = faiss.IndexFlatIP(Config.EMBEDDING_DIM)

        # Normalize embeddings for cosine similarity
        embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
        self.index.add(embeddings.astype('float32'))

        # Store metadata
        self.metadata = documents

        print(f"✅ Built index with {len(documents)} document chunks")
        return True

    def search(self, query: str, k: int = Config.TOP_K_RETRIEVAL) -> List[Dict]:
        """Search for similar documents"""
        if self.index is None or len(self.documents) == 0:
            print("⚠️ No index available for search")
            return []

        print(f"Searching for: '{query}'")

        # Generate query embedding
        query_embedding = self.embedding_model.encode([query])
        query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True)

        # Search
        scores, indices = self.index.search(query_embedding.astype('float32'), k)

        # Return results
        results = []
        for score, idx in zip(scores[0], indices[0]):
            if idx >= 0 and idx < len(self.metadata):  # Valid index check
                result = self.metadata[idx].copy()
                result['similarity_score'] = float(score)
                results.append(result)

        print(f"✅ Found {len(results)} relevant documents")
        for i, result in enumerate(results):
            print(f"  {i+1}. {result['filename']} (score: {result['similarity_score']:.3f})")

        return results

    def save_index(self, filepath: str):
        """Save the FAISS index to file"""
        if self.index is not None:
            faiss.write_index(self.index, filepath)

            # Save metadata
            metadata_path = filepath.replace('.index', '_metadata.json')
            with open(metadata_path, 'w') as f:
                json.dump(self.metadata, f, indent=2)

            print(f"✅ Index saved to {filepath}")

    def load_index(self, filepath: str):
        """Load FAISS index from file"""
        if os.path.exists(filepath):
            self.index = faiss.read_index(filepath)

            # Load metadata
            metadata_path = filepath.replace('.index', '_metadata.json')
            if os.path.exists(metadata_path):
                with open(metadata_path, 'r') as f:
                    self.metadata = json.load(f)
                    self.documents = self.metadata

            print(f"✅ Index loaded from {filepath}")
            return True
        return False

# Test the VectorDatabase
def test_vector_database():
    """Test the vector database"""
    print("Testing VectorDatabase...")

    # Create sample documents
    sample_docs = [
        {
            'id': 'doc1_0',
            'filename': 'accessibility.txt',
            'content': 'Screen readers help visually impaired users navigate computers and websites.',
            'chunk_index': 0
        },
        {
            'id': 'doc1_1',
            'filename': 'accessibility.txt',
            'content': 'Voice assistants can control smart home devices and provide information.',
            'chunk_index': 1
        },
        {
            'id': 'doc2_0',
            'filename': 'technology.txt',
            'content': 'Braille displays provide tactile feedback for reading digital content.',
            'chunk_index': 0
        }
    ]

    # Initialize and build index
    vector_db = VectorDatabase()
    success = vector_db.build_index(sample_docs)

    if success:
        # Test search
        results = vector_db.search("How do screen readers work?", k=2)
        print(f"✅ Search test completed: {len(results)} results")

    return vector_db

# Run test
if __name__ == "__main__":
    test_vector_db = test_vector_database()

In [None]:
import os
from google.colab import drive
from IPython.display import clear_output

# Ask for Groq API key (input is hidden by default in Colab)
groq_api_key = input("Enter your Groq API key: ").strip()

# Content for the .env file
env_content = f"""GROQ_API_KEY={groq_api_key}
"""

# Path to the .env file in Colab's base directory
env_path = '/content/.env'

# Write the .env file
with open(env_path, 'w') as f:
    f.write(env_content)

# Clear the output that might show the key
clear_output()

# Verify the file was created without showing contents
if os.path.exists(env_path):
    print("✅ .env file created successfully (contents hidden for security)")
else:
    print("❌ Failed to create .env file")

In [None]:
# First install required packages
!pip install python-dotenv groq

In [None]:

# PART 6: LLM PROCESSING WITH GROQ (Llama 2 version)
import os
from typing import List, Dict
from groq import Groq
from dotenv import load_dotenv
from IPython.display import clear_output

class Config:
    GROQ_MODEL = "llama3-8b-8192"  # Using Llama 2 70B model
    SUPPORTED_LANGUAGES = {"en": "English", "es": "Spanish", "fr": "French"}
    SYSTEM_PROMPT = """You are an AI assistant specialized in helping visually impaired users.
    Provide clear, concise responses using simple language.
    Always structure your answers for easy comprehension by screen readers."""

class LLMProcessor:
    """Handles LLM interactions using Groq API with Llama 2"""

    def __init__(self, api_key: str = None):
        self.api_key = api_key or os.getenv("GROQ_API_KEY")
        if not self.api_key:
            raise ValueError("Groq API key is required. Set GROQ_API_KEY in .env or pass directly")

        self.client = Groq(api_key=self.api_key)
        print(f"✅ LLMProcessor initialized with {Config.GROQ_MODEL}")

    def generate_response(self, query: str, context_docs: List[Dict] = None, language: str = "en") -> str:
        """Generate response using Llama 2 via Groq"""
        try:
            print(f"Processing query: '{query[:50]}...'")

            prompt = self._create_prompt(
                query,
                self._prepare_context(context_docs) if context_docs else "",
                language
            )

            response = self.client.chat.completions.create(
                model=Config.GROQ_MODEL,
                messages=[
                    {"role": "system", "content": Config.SYSTEM_PROMPT},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.5,  # Lower for more factual responses
                max_tokens=1024,
                top_p=0.9
            )

            result = response.choices[0].message.content.strip()
            print(f"✅ Generated {len(result.split())} words")
            return result

        except Exception as e:
            error_msg = f"LLM Error: {str(e)}"
            print(f"❌ {error_msg}")
            return f"Sorry, I encountered an error. Please try again later."

    def _prepare_context(self, docs: List[Dict]) -> str:
        return "\n\n".join(
            f"Document {i+1} ({d.get('filename','unnamed')}):\n{d['content'][:400]}..."
            for i, d in enumerate(docs)
        ) if docs else "No context provided"

    def _create_prompt(self, query: str, context: str, language: str) -> str:
        lang = Config.SUPPORTED_LANGUAGES.get(language, "English")
        return f"""Respond in {lang} to this query using the context below.

Context:
{context}

Query: {query}

Guidelines:
1. Answer directly and factually
2. Use simple, clear language
3. Keep under 5 sentences unless complex
4. Specify when information is incomplete"""

def setup_environment():
    """Secure environment setup"""
    load_dotenv('/content/.env')
    if not os.getenv("GROQ_API_KEY"):
        from getpass import getpass
        api_key = getpass("Enter Groq API key: ")
        if api_key:
            with open('/content/.env', 'w') as f:
                f.write(f"GROQ_API_KEY={api_key}")
            load_dotenv('/content/.env', override=True)
            clear_output()
            print("✅ Environment configured")
        else:
            print("⚠️ Continuing without API key")

# Example usage
if __name__ == "__main__":
    setup_environment()

    llm = LLMProcessor()

    # Test with and without context
    print("\n--- Basic Test ---")
    print(llm.generate_response("Explain quantum computing simply"))

    print("\n--- Contextual Test ---")
    context_docs = [{
        'filename': 'accessibility_guide.txt',
        'content': 'Screen readers convert text to speech and braille. Popular ones include JAWS, NVDA, and VoiceOver.'
    }]
    print(llm.generate_response(
        "What are the most common screen readers?",
        context_docs
    ))

In [None]:
# PART 7: MAIN RAG SYSTEM

class MultilingualRAGSystem:
    """Main RAG system coordinating all components"""

    def __init__(self, groq_api_key: str):
        print("Initializing Multilingual RAG System...")

        try:
            self.doc_processor = DocumentProcessor()
            self.voice_processor = VoiceProcessor()
            self.vector_db = VectorDatabase()
            self.llm_processor = LLMProcessor(groq_api_key)
            self.conversation_history = []

            print("✅ RAG System initialized successfully")

        except Exception as e:
            print(f"❌ Error initializing RAG system: {e}")
            raise

    def initialize_system(self, documents_dir: str):
        """Initialize the RAG system with documents"""
        print(f"Initializing system with documents from: {documents_dir}")

        # Load documents
        documents = self.doc_processor.load_documents(documents_dir)

        if documents:
            # Build vector index
            success = self.vector_db.build_index(documents)
            if success:
                print("✅ System initialization completed")
                return True

        print("⚠️ System initialization completed but no documents indexed")
        return False

    def process_voice_query_from_file(self, audio_file_path: str) -> Tuple[str, str, bytes]:
        """Process voice query from audio file and return text response and audio"""
        print(f"Processing voice query from file: {audio_file_path}")

        # Speech to text
        query_text, detected_language = self.voice_processor.speech_to_text(audio_file_path)

        if not query_text:
            return "", "Could not understand the audio input.", b""

        # Process query
        response_text = self.process_text_query(query_text, detected_language)

        # Text to speech
        response_audio = self.voice_processor.text_to_speech(response_text, detected_language)

        return query_text, response_text, response_audio

    def process_voice_query_from_bytes(self, audio_data: bytes) -> Tuple[str, str, bytes]:
        """Process voice query from audio bytes and return text response and audio"""
        print("Processing voice query from audio bytes")

        # Speech to text
        query_text, detected_language = self.voice_processor.speech_to_text_from_bytes(audio_data)

        if not query_text:
            return "", "Could not understand the audio input.", b""

        # Process query
        response_text = self.process_text_query(query_text, detected_language)

        # Text to speech
        response_audio = self.voice_processor.text_to_speech(response_text, detected_language)

        return query_text, response_text, response_audio

    def process_text_query(self, query: str, language: str = "en") -> str:
        """Process text query and return response"""
        print(f"Processing text query: '{query}' (language: {language})")

        # Retrieve relevant documents
        retrieved_docs = self.vector_db.search(query)

        # Generate response
        response = self.llm_processor.generate_response(query, retrieved_docs, language)

        # Store in conversation history
        self.conversation_history.append({
            'timestamp': datetime.now(),
            'query': query,
            'language': language,
            'response': response,
            'retrieved_docs': len(retrieved_docs)
        })

        print(f"✅ Query processed successfully")
        return response

    def get_conversation_history(self) -> List[Dict]:
        """Get conversation history"""
        return self.conversation_history

    def clear_conversation_history(self):
        """Clear conversation history"""
        self.conversation_history = []
        print("✅ Conversation history cleared")

# Create sample documents for testing
# Create sample documents for testing
def create_sample_documents():
    """Create sample documents for demonstration"""
    docs_dir = Config.DOCUMENTS_DIR
    os.makedirs(docs_dir, exist_ok=True)

    # Sample document 1: Accessibility Guide
    with open(f"{docs_dir}/accessibility_guide.txt", "w", encoding="utf-8") as f:
        f.write("""Accessibility Guide for Visually Impaired Users

Introduction
This guide provides essential information about accessibility tools and techniques for visually impaired individuals.

Screen Readers
Screen readers are software applications that convert text and interface elements into speech or Braille output. Popular screen readers include:
- NVDA (NonVisual Desktop Access) - Free and open source
- JAWS (Job Access With Speech) - Commercial screen reader
- VoiceOver - Built into Apple devices
- TalkBack - Built into Android devices

Navigation Techniques
Effective navigation using assistive technology involves:
1. Learning keyboard shortcuts for faster navigation
2. Using heading navigation to jump between sections
3. Utilizing landmarks to understand page structure
4. Taking advantage of skip links to bypass repetitive content

Web Accessibility
When browsing the web, look for sites that follow WCAG guidelines:
- Proper heading structure
- Alternative text for images
- Keyboard-accessible controls
- High contrast color schemes
- Descriptive link text""")

    # Sample document 2: Technology Tips
    with open(f"{docs_dir}/technology_tips.txt", "w", encoding="utf-8") as f:
        f.write("""Technology Tips for Enhanced Accessibility

Voice Assistants
Voice assistants can significantly improve daily productivity:
- Set reminders and alarms
- Control smart home devices
- Get weather and news updates
- Make phone calls and send messages
- Search for information hands-free

Audio Books and Podcasts
Digital audio content provides access to vast libraries:
- Audible and similar services offer extensive catalogs
- Many public libraries provide free audiobook access
- Podcast apps organize content by topics and interests
- Speed control allows for personalized listening preferences

Braille Displays
Modern refreshable Braille displays offer:
- Tactile feedback for digital content
- Portable designs for mobility
- Bluetooth connectivity with devices
- Multiple Braille grade support""")

    print(f"✅ Created sample documents in {docs_dir}")

In [None]:
# SIMPLE WORKING VOICE CHATBOT FOR GOOGLE COLAB
!pip install -q openai-whisper sentence-transformers faiss-cpu gtts pydub PyPDF2 python-docx groq python-dotenv

import os, io, numpy as np, time
from pathlib import Path
from google.colab import files, output
from IPython.display import Audio, display, clear_output, HTML
import whisper
from sentence_transformers import SentenceTransformer
import faiss
from gtts import gTTS
from pydub import AudioSegment
import PyPDF2
from docx import Document
from groq import Groq
from dotenv import load_dotenv
import base64

# Configuration
class Config:
    GROQ_MODEL = "llama3-8b-8192"
    WHISPER_MODEL = "base"  # Good balance between speed and accuracy
    EMBEDDING_MODEL = "all-MiniLM-L6-v2"
    DOCUMENTS_DIR = "/content/documents"
    EMBEDDING_DIM = 384
    SYSTEM_PROMPT = "You are a helpful AI assistant. Answer concisely in 1-2 sentences."

# Initialize
def init():
    # Setup environment
    if not os.path.exists('/content/.env'):
        print("Please enter your Groq API key (get it from https://console.groq.com/keys)")
        api_key = input("API Key: ").strip()
        if not api_key:
            raise ValueError("API key is required")
        with open('/content/.env', 'w') as f:
            f.write(f"GROQ_API_KEY={api_key}\n")
        clear_output()

    load_dotenv('/content/.env')
    if not os.getenv('GROQ_API_KEY'):
        raise ValueError("GROQ_API_KEY not found in environment variables")

    # Create documents directory
    os.makedirs(Config.DOCUMENTS_DIR, exist_ok=True)

    # Load models
    print("Loading models...")
    models = {
        'whisper': whisper.load_model(Config.WHISPER_MODEL),
        'embedding': SentenceTransformer(Config.EMBEDDING_MODEL),
        'groq': Groq(api_key=os.getenv('GROQ_API_KEY'))
    }
    print("✅ Models loaded")
    return models

# Document Processing
def process_uploaded_files():
    print("Please upload your documents (PDF/DOCX/TXT):")
    uploaded = files.upload()
    if not uploaded:
        return []

    documents = []
    for filename, content in uploaded.items():
        filepath = os.path.join(Config.DOCUMENTS_DIR, filename)
        with open(filepath, 'wb') as f:
            f.write(content)

        text = extract_text(filepath)
        if text:
            chunks = split_text(text)
            for i, chunk in enumerate(chunks):
                documents.append({
                    'filename': filename,
                    'chunk_id': i,
                    'content': chunk
                })
            print(f"📄 {filename}: {len(chunks)} chunks extracted")

    return documents

def extract_text(filepath):
    text = ""
    try:
        if filepath.lower().endswith('.pdf'):
            with open(filepath, 'rb') as f:
                reader = PyPDF2.PdfReader(f)
                text = "\n".join([page.extract_text() for page in reader.pages])
        elif filepath.lower().endswith('.docx'):
            doc = Document(filepath)
            text = "\n".join([para.text for para in doc.paragraphs])
        elif filepath.lower().endswith(('.txt', '.md')):
            with open(filepath, 'r', encoding='utf-8') as f:
                text = f.read()
    except Exception as e:
        print(f"⚠️ Error reading {filepath}: {str(e)}")
    return text

def split_text(text, chunk_size=500):
    words = text.split()
    chunks = []
    current_chunk = []
    current_size = 0

    for word in words:
        if current_size + len(word) > chunk_size and current_chunk:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            current_size = 0
        current_chunk.append(word)
        current_size += len(word) + 1

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

def build_index(documents, embedding_model):
    if not documents:
        return None

    print("Building search index...")
    texts = [doc['content'] for doc in documents]
    embeddings = embedding_model.encode(texts, show_progress_bar=True)
    embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

    index = faiss.IndexFlatIP(Config.EMBEDDING_DIM)
    index.add(embeddings.astype('float32'))
    print("✅ Index built")
    return index

# Audio Processing
def create_recorder_ui():
    display(HTML("""
    <div style="text-align: center; margin: 20px; padding: 20px; border: 2px solid #4CAF50; border-radius: 10px;">
        <h3>🎤 Voice Chat Interface</h3>
        <button id="recordBtn" style="padding: 12px 24px; font-size: 16px; background: #4CAF50; color: white; border: none; border-radius: 4px; cursor: pointer;">
            🎤 Start Recording
        </button>
        <div id="status" style="margin: 15px 0; font-weight: bold;">Ready to record</div>
        <div id="timer" style="font-size: 18px; color: #D32F2F;"></div>
    </div>

    <script>
    let mediaRecorder, audioChunks = [], isRecording = false;
    const recordBtn = document.getElementById('recordBtn');
    const statusDiv = document.getElementById('status');
    const timerDiv = document.getElementById('timer');
    let startTime, timerInterval;

    recordBtn.onclick = async function() {
        if (!isRecording) {
            await startRecording();
        } else {
            stopRecording();
        }
    };

    async function startRecording() {
        try {
            statusDiv.textContent = "Starting recording...";
            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
            mediaRecorder = new MediaRecorder(stream);
            audioChunks = [];

            mediaRecorder.ondataavailable = e => {
                if (e.data.size > 0) audioChunks.push(e.data);
            };

            mediaRecorder.onstop = async () => {
                statusDiv.textContent = "Processing...";
                const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
                const reader = new FileReader();
                reader.onloadend = () => {
                    const base64Audio = reader.result.split(',')[1];
                    google.colab.kernel.invokeFunction('process_audio', [base64Audio], {});
                };
                reader.readAsDataURL(audioBlob);
                stream.getTracks().forEach(track => track.stop());
            };

            mediaRecorder.start(100);
            isRecording = true;
            recordBtn.textContent = "⏹️ Stop Recording";
            recordBtn.style.background = "#f44336";
            statusDiv.textContent = "Recording...";

            // Start timer
            startTime = Date.now();
            timerInterval = setInterval(updateTimer, 1000);
            updateTimer();

        } catch (err) {
            statusDiv.textContent = "Error: " + err.message;
            console.error(err);
        }
    }

    function stopRecording() {
        if (mediaRecorder && isRecording) {
            mediaRecorder.stop();
            isRecording = false;
            clearInterval(timerInterval);
            recordBtn.textContent = "🎤 Start Recording";
            recordBtn.style.background = "#4CAF50";
        }
    }

    function updateTimer() {
        const seconds = Math.floor((Date.now() - startTime) / 1000);
        timerDiv.textContent = `Recording: ${seconds}s`;
    }
    </script>
    """))

def process_audio(base64_audio):
    try:
        # Save audio file
        audio_bytes = base64.b64decode(base64_audio)
        audio_path = "/content/recording.webm"
        with open(audio_path, "wb") as f:
            f.write(audio_bytes)

        # Convert to WAV
        audio = AudioSegment.from_file(audio_path)
        audio = audio.set_frame_rate(16000).set_channels(1)
        wav_path = "/content/recording.wav"
        audio.export(wav_path, format="wav")

        # Transcribe
        result = models['whisper'].transcribe(wav_path)
        query = result["text"].strip()
        print(f"🎤 You asked: {query}")

        if not query or len(query) < 3:
            return "Sorry, I didn't hear that clearly."

        # Get response
        response = generate_response(query)
        print(f"🤖 Response: {response}")

        # Convert to speech
        tts = gTTS(text=response, lang='en')
        audio_buffer = io.BytesIO()
        tts.write_to_fp(audio_buffer)
        audio_buffer.seek(0)

        # Play response
        display(Audio(audio_buffer.read(), autoplay=True))
        display(HTML(f"<div style='margin:20px; padding:15px; background:#f5f5f5; border-radius:8px;'><b>Response:</b> {response}</div>"))

        # Cleanup
        os.remove(audio_path)
        os.remove(wav_path)

        return response

    except Exception as e:
        print(f"Error: {str(e)}")
        return "Sorry, something went wrong."

def generate_response(query):
    try:
        response = models['groq'].chat.completions.create(
            model=Config.GROQ_MODEL,
            messages=[
                {"role": "system", "content": Config.SYSTEM_PROMPT},
                {"role": "user", "content": query}
            ],
            temperature=0.7,
            max_tokens=150
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error generating response: {str(e)}"

# Main Execution
print("🔊 Voice Chatbot Initializing...")
models = init()
documents = process_uploaded_files()
index = build_index(documents, models['embedding'])

# Register audio processor
output.register_callback('process_audio', process_audio)

print("\n🚀 Ready to chat! Click the record button below:")
create_recorder_ui()
print("\n💡 Tips: Speak clearly after clicking record, and keep queries under 10 seconds.")