In [1]:
import PyPDF2
import requests
from bs4 import BeautifulSoup
import youtube_transcript_api
from youtube_transcript_api import YouTubeTranscriptApi
import chromadb
from chromadb.config import Settings
from transformers import AutoTokenizer, AutoModel
import torch
import re
import unicodedata
import os
from nltk.corpus import stopwords
import nltk

In [2]:
# Step 1: Initialize Hugging Face model and tokenizer for embeddings
def initialize_embedding_model():
    embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
    tokenizer = AutoTokenizer.from_pretrained(embedding_model)
    model = AutoModel.from_pretrained(embedding_model)
    return tokenizer, model

In [3]:
# Step 2: Generate embeddings for text
def get_embeddings(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings

In [5]:
# Step 3: Preprocessing - Clean and normalize text
def preprocess_text(text, preserve_code=True):
    # Normalize Unicode characters
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
    
    # Remove special characters, keep alphanumeric and basic punctuation
    text = re.sub(r'[^\w\s.,;:!?-]', ' ', text)
    
    # Replace multiple spaces, newlines, or tabs with a single space
    text = re.sub(r'\s+', ' ', text).strip()
    
    # Convert to lowercase (except for code snippets if preserve_code is True)
    if not preserve_code:
        text = text.lower()
    else:
        # Preserve code snippets (assuming they are within triple backticks or indentation)
        code_blocks = []
        def code_replacement(match):
            code_blocks.append(match.group(0))
            return f"__CODE_BLOCK_{len(code_blocks)-1}__"
        
        text = re.sub(r'```[\s\S]*?```', code_replacement, text)
        text = re.sub(r'^\s{4,}.*$', code_replacement, text, flags=re.MULTILINE)
        text = text.lower()
        # Restore code blocks
        for i, code in enumerate(code_blocks):
            text = text.replace(f"__CODE_BLOCK_{i}__", code)
    
    # Optionally remove stopwords (disabled by default for programming context)
    # stop_words = set(stopwords.words('english'))
    # words = text.split()
    # text = ' '.join(word for word in words if word.lower() not in stop_words)
    
    return text

In [6]:
# Step 4: Extract text from PDF with structuring and page range support
def extract_text_from_pdf(pdf_path, start_page=None, end_page=None):
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text_chunks = []
            current_section = ""
            current_text = ""
            
            # Set default page range if not specified
            if start_page is None:
                start_page = 0
            else:
                start_page = max(0, start_page - 1)  # convert to 1-based to 0-based index
            
            if end_page is None:
                end_page = len(reader.pages)
            else:
                end_page = min(end_page, len(reader.pages))
            
            # Validate page range
            if start_page >= end_page:
                print(f"Invalid page range: start_page {start_page+1} >= end_page {end_page}")
                return []
            
            # Process only the specified page range
            for page_num in range(start_page, end_page):
                page = reader.pages[page_num]
                text = page.extract_text() or ""
                lines = text.split('\n')
                
                for line in lines:
                    # Heuristic: Assume lines with all caps or short length are headings
                    if line.isupper() or (len(line.strip()) < 50 and line.strip().endswith(':')):
                        if current_text:
                            text_chunks.append((current_section, preprocess_text(current_text)))
                            current_text = ""
                        current_section = line.strip()
                    else:
                        current_text += " " + line
                
                if current_text:
                    text_chunks.append((current_section, preprocess_text(current_text)))
                    current_text = ""
            
            return text_chunks  # List of (section, text) tuples
            
    except Exception as e:
        print(f"Error extracting text from PDF {pdf_path}: {e}")
        return []

In [None]:
# # Step 4: Extract text from PDF with structuring
# def extract_text_from_pdf(pdf_path):
#     try:
#         with open(pdf_path, 'rb') as file:
#             reader = PyPDF2.PdfReader(file)
#             text_chunks = []
#             current_section = ""
#             current_text = ""
            
#             for page in reader.pages:
#                 text = page.extract_text() or ""
#                 lines = text.split('\n')
#                 for line in lines:
#                     # Heuristic: Assume lines with all caps or short length are headings
#                     if line.isupper() or (len(line.strip()) < 50 and line.strip().endswith(':')):
#                         if current_text:
#                             text_chunks.append((current_section, preprocess_text(current_text)))
#                             current_text = ""
#                         current_section = line.strip()
#                     else:
#                         current_text += " " + line
#                 if current_text:
#                     text_chunks.append((current_section, preprocess_text(current_text)))
#                     current_text = ""
            
#             return text_chunks  # List of (section, text) tuples
#     except Exception as e:
#         print(f"Error extracting text from PDF {pdf_path}: {e}")
#         return []

In [7]:
# Step 5: Extract text from website with structuring
def extract_text_from_website(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove script, style, and navigation elements
        for element in soup(["script", "style", "nav", "footer", "header"]):
            element.decompose()
        
        text_chunks = []
        current_section = ""
        current_text = ""
        
        # Extract headings (h1, h2, h3) and their content
        for element in soup.find_all(['h1', 'h2', 'h3', 'p', 'li']):
            if element.name in ['h1', 'h2', 'h3']:
                if current_text:
                    text_chunks.append((current_section, preprocess_text(current_text)))
                    current_text = ""
                current_section = element.get_text(strip=True)
            else:
                current_text += " " + element.get_text(strip=True)
        
        if current_text:
            text_chunks.append((current_section, preprocess_text(current_text)))
        
        return text_chunks  # List of (section, text) tuples
    except Exception as e:
        print(f"Error extracting text from website {url}: {e}")
        return []

In [8]:
# Step 6: Extract text from YouTube video transcript with structuring
def extract_youtube_transcript(video_url):
    try:
        video_id = None
        if "youtube.com" in video_url or "youtu.be" in video_url:
            match = re.search(r"(?:v=|youtu\.be/)([0-9A-Za-z_-]{11})", video_url)
            if match:
                video_id = match.group(1)
        
        if not video_id:
            print(f"Invalid YouTube URL: {video_url}")
            return []
            
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        text_chunks = []
        current_text = ""
        current_timestamp = 0
        
        # Group transcript by time intervals (e.g., every 60 seconds)
        for entry in transcript:
            if entry['start'] > current_timestamp + 60:
                if current_text:
                    text_chunks.append((f"Segment_{int(current_timestamp)}", preprocess_text(current_text)))
                    current_text = ""
                current_timestamp = entry['start']
            current_text += " " + entry['text']
        
        if current_text:
            text_chunks.append((f"Segment_{int(current_timestamp)}", preprocess_text(current_text)))
        
        return text_chunks  # List of (segment, text) tuples
    except Exception as e:
        print(f"Error extracting transcript from YouTube video {video_url}: {e}")
        return []

In [9]:
# Step 7: Cache preprocessed text to disk
def cache_text(source, text_chunks, cache_dir="../cache"):
    os.makedirs(cache_dir, exist_ok=True)
    source_name = re.sub(r'[^\w\-_\.]', '_', source)
    cache_path = os.path.join(cache_dir, f"{source_name}.txt")
    
    try:
        with open(cache_path, 'w', encoding='utf-8') as f:
            for section, text in text_chunks:
                f.write(f"--- {section} ---\n{text}\n\n")
        print(f"Cached text for {source} to {cache_path}")
    except Exception as e:
        print(f"Error caching text for {source}: {e}")

In [10]:
# Step 8: Check cache for preprocessed text
def load_from_cache(source, cache_dir="../cache"):
    source_name = re.sub(r'[^\w\-_\.]', '_', source)
    cache_path = os.path.join(cache_dir, f"{source_name}.txt")
    
    if os.path.exists(cache_path):
        try:
            with open(cache_path, 'r', encoding='utf-8') as f:
                content = f.read()
                chunks = []
                current_section = ""
                current_text = ""
                for line in content.splitlines():
                    if line.startswith("--- ") and line.endswith(" ---"):
                        if current_text:
                            chunks.append((current_section, current_text.strip()))
                            current_text = ""
                        current_section = line[4:-4].strip()
                    else:
                        current_text += " " + line
                if current_text:
                    chunks.append((current_section, current_text.strip()))
                return chunks
        except Exception as e:
            print(f"Error loading cache for {source}: {e}")
    return None

In [11]:
# Step 9: Initialize embedding model and generate sample embeddings for review
def generate_sample_embeddings(text_chunks, sources, num_samples=5):
    """
    Initialize embedding model and generate sample embeddings for review.
    Does not store embeddings, only displays them for evaluation.
    
    Args:
        text_chunks: List of text chunks from different sources
        sources: List of source names corresponding to text_chunks
        num_samples: Number of sample embeddings to generate and display
    """
    
    # Initialize the embedding model
    print("Initializing embedding model...")
    tokenizer, model = initialize_embedding_model()
    print("Embedding model initialized successfully!\n")
    
    sample_count = 0
    
    for source, chunks in zip(sources, text_chunks):
        if not chunks or sample_count >= num_samples:
            break
            
        print(f"Processing source: {source}")
        print("-" * 50)
        
        for i, (section, text) in enumerate(chunks):
            if sample_count >= num_samples:
                break
                
            # Split text into smaller chunks (~500 words) for embedding
            words = text.split()
            chunk_size = 500
            sub_chunks = [" ".join(words[j:j + chunk_size]) for j in range(0, len(words), chunk_size)]
            
            for j, sub_chunk in enumerate(sub_chunks):
                if sample_count >= num_samples:
                    break
                    
                # Generate embedding for the sub-chunk
                print(f"\nSample {sample_count + 1}:")
                print(f"Source: {source}")
                print(f"Section: {section}")
                print(f"Chunk ID: {i}_{j}")
                print(f"Text preview (first 100 chars): {sub_chunk[:100]}...")
                
                # Get the embedding
                embedding = get_embeddings(sub_chunk, tokenizer, model)
                
                # Display embedding information
                print(f"Embedding shape: {embedding.shape}")
                print(f"Embedding type: {type(embedding)}")
                print(f"First 10 dimensions: {embedding[:10].tolist()}")
                # print(f"Embedding norm: {float(embedding.norm()):.4f}")
                print(f"Min value: {float(embedding.min()):.4f}")
                print(f"Max value: {float(embedding.max()):.4f}")
                print(f"Mean value: {float(embedding.mean()):.4f}")
                print("=" * 60)
                
                sample_count += 1
    
    print(f"\nGenerated and displayed {sample_count} sample embeddings for review.")

In [12]:
# Step 10: Initialize ChromaDB with persistence and store embeddings 
def store_in_chromadb(text_chunks, sources, collection_name="python_tutor", persist_directory="../data/chroma_db"):
    # Initialize ChromaDB client with persistent storage
    client = chromadb.PersistentClient(
        path=persist_directory
    )
    try:
        # Try to get existing collection or create new one
        collection = client.get_or_create_collection(name=collection_name)
    except Exception as e:
        print(f"Error accessing/creating collection: {e}")
        return

    tokenizer, model = initialize_embedding_model()
    
    for source, chunks in zip(sources, text_chunks):
        if not chunks:
            continue
            
        for i, (section, text) in enumerate(chunks):
            # Split text into smaller chunks (~500 words) for embedding
            words = text.split()
            chunk_size = 500
            sub_chunks = [" ".join(words[j:j + chunk_size]) for j in range(0, len(words), chunk_size)]
            
            for j, sub_chunk in enumerate(sub_chunks):
                embedding = get_embeddings(sub_chunk, tokenizer, model)
                collection.add(
                    documents=[sub_chunk],
                    embeddings=[embedding.tolist()],
                    metadatas=[{"source": source, "section": section, "chunk_id": f"{i}_{j}"}],
                    ids=[f"{source}_{i}_{j}"]
                )
                print(f"Stored chunk {i}_{j} from {source} (section: {section})")
    
    print(f"Database persisted to {persist_directory}")

In [13]:
# Main function to process all sources
def process_knowledge_base(pdf_paths, website_urls, youtube_urls, cache_dir="..\cache", 
                          pdf_start_page=None, pdf_end_page=None):
    texts = []
    sources = []
    
    # # Process PDFs
    # for pdf_path in pdf_paths:
    #     cached = load_from_cache(pdf_path, cache_dir)
    #     if cached:
    #         texts.append(cached)
    #         sources.append(pdf_path)
    #     else:
    #         chunks = extract_text_from_pdf(pdf_path, start_page=pdf_start_page, end_page=pdf_end_page)
    #         if chunks:
    #             cache_text(pdf_path, chunks, cache_dir)
    #             texts.append(chunks)
    #             sources.append(pdf_path)
    
    # Process websites
    for url in website_urls:
        cached = load_from_cache(url, cache_dir)
        if cached:
            texts.append(cached)
            sources.append(url)
        else:
            chunks = extract_text_from_website(url)
            if chunks:
                cache_text(url, chunks, cache_dir)
                texts.append(chunks)
                sources.append(url)
    
    # # Process YouTube videos
    # for video_url in youtube_urls:
    #     cached = load_from_cache(video_url, cache_dir)
    #     if cached:
    #         texts.append(cached)
    #         sources.append(video_url)
    #     else:
    #         chunks = extract_youtube_transcript(video_url)
    #         if chunks:
    #             cache_text(video_url, chunks, cache_dir)
    #             texts.append(chunks)
    #             sources.append(video_url)
    
    # generate_sample_embeddings for review
    generate_sample_embeddings(texts, sources)

    store_in_chromadb(texts, sources)

In [None]:
# # Main function to process all sources
# def process_knowledge_base(pdf_paths, website_urls, youtube_urls, cache_dir="..\cache"):
#     texts = []
#     sources = []
    
#     # Process PDFs
#     for pdf_path in pdf_paths:
#         cached = load_from_cache(pdf_path, cache_dir)
#         if cached:
#             texts.append(cached)
#             sources.append(pdf_path)
#         else:
#             chunks = extract_text_from_pdf(pdf_path)
#             if chunks:
#                 cache_text(pdf_path, chunks, cache_dir)
#                 texts.append(chunks)
#                 sources.append(pdf_path)
    
#     # # Process websites
#     # for url in website_urls:
#     #     cached = load_from_cache(url, cache_dir)
#     #     if cached:
#     #         texts.append(cached)
#     #         sources.append(url)
#     #     else:
#     #         chunks = extract_text_from_website(url)
#     #         if chunks:
#     #             cache_text(url, chunks, cache_dir)
#     #             texts.append(chunks)
#     #             sources.append(url)
    
#     # # Process YouTube videos
#     # for video_url in youtube_urls:
#     #     cached = load_from_cache(video_url, cache_dir)
#     #     if cached:
#     #         texts.append(cached)
#     #         sources.append(video_url)
#     #     else:
#     #         chunks = extract_youtube_transcript(video_url)
#     #         if chunks:
#     #             cache_text(video_url, chunks, cache_dir)
#     #             texts.append(chunks)
#     #             sources.append(video_url)
    
#     # generate_sample_embeddings for review
#     generate_sample_embeddings(texts, sources)

#     store_in_chromadb(texts, sources)

In [14]:
# Example usage
if __name__ == "__main__":
    pdf_paths = [
        "../data/raw_data/Starting Out with Python, Global Edition, 4th Edition.pdf"
    ]
    website_urls = [
        "https://www.geeksforgeeks.org/how-to-learn-python-from-scratch/"
    ]
    youtube_urls = [
        "https://www.youtube.com/watch?v=8124kv-632k"
    ]
    
    process_knowledge_base(pdf_paths, website_urls, youtube_urls, 
                          pdf_start_page=42, pdf_end_page=703)  # Example page range for PDFs

Cached text for https://www.geeksforgeeks.org/how-to-learn-python-from-scratch/ to ..\cache\https___www.geeksforgeeks.org_how-to-learn-python-from-scratch_.txt
Initializing embedding model...
Embedding model initialized successfully!

Processing source: https://www.geeksforgeeks.org/how-to-learn-python-from-scratch/
--------------------------------------------------

Sample 1:
Source: https://www.geeksforgeeks.org/how-to-learn-python-from-scratch/
Section: 
Chunk ID: 0_0
Text preview (first 100 chars): python tutorial interview questions python quiz python glossary python projects practice python data...
Embedding shape: (384,)
Embedding type: <class 'numpy.ndarray'>
First 10 dimensions: [-0.4097895622253418, 0.07391665875911713, 0.08384502679109573, 0.19434501230716705, -0.2590826749801636, -0.6343346834182739, 0.15607039630413055, -0.0026295939460396767, -0.40203383564949036, -0.06282304972410202]
Min value: -0.6343
Max value: 0.6420
Mean value: -0.0036

Sample 2:
Source: https://www

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "api key here"

In [15]:
# from llama_index.llms import LLMMetadata, LLM
# from llama_index import Settings

Settings.llm = None 

In [1]:
import os
import warnings
warnings.filterwarnings('ignore')

# Import all required libraries
import chromadb
import gradio as gr
from transformers import AutoTokenizer, AutoModel, pipeline
import torch
from huggingface_hub import login
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.storage.storage_context import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import PromptTemplate
import logging
import sys

In [2]:
# Complete RAG-based LLM Tutor Implementation
# Steps 11-15: From Vector Database to Interactive Gradio Interface



# Configure logging to reduce noise
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

print("Starting RAG-based LLM Tutor Implementation...")
print("=" * 60)


Starting RAG-based LLM Tutor Implementation...


In [3]:

# =============================================================================
# STEP 11: Set up the RAG Framework
# =============================================================================

class RAGTutor:
    def __init__(self, persist_directory="../data/chroma_db", collection_name="python_tutor"):
        """
        Initialize the RAG Tutor with ChromaDB connection and embedding model
        """
        self.persist_directory = persist_directory
        self.collection_name = collection_name
        self.client = None
        self.collection = None
        self.embedding_model = None
        self.tokenizer = None
        self.llm_pipeline = None
        self.query_engine = None
        
        print("Step 11: Setting up RAG Framework...")
        self._setup_embedding_model()
        self._connect_to_chromadb()
        self._setup_llm_pipeline()
        print("‚úì RAG Framework setup complete!")
        
    def _setup_embedding_model(self):
        """
        Initialize the embedding model for text vectorization
        """
        print("  - Initializing embedding model...")
        
        # Use the same embedding model as in original code
        embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
        
        # Initialize for direct use
        self.tokenizer = AutoTokenizer.from_pretrained(embedding_model_name)
        self.embedding_model = AutoModel.from_pretrained(embedding_model_name)
        
        # Initialize for LlamaIndex
        self.embed_model = HuggingFaceEmbedding(model_name=embedding_model_name)
        
        print("  ‚úì Embedding model initialized")
        
    def _connect_to_chromadb(self):
        """
        Connect to existing ChromaDB collection
        """
        print("  - Connecting to ChromaDB...")
        
        try:
            # Connect to persistent ChromaDB
            self.client = chromadb.PersistentClient(path=self.persist_directory)
            
            # Get existing collection
            self.collection = self.client.get_collection(name=self.collection_name)
            
            # Get collection info
            collection_count = self.collection.count()
            print(f"  ‚úì Connected to ChromaDB collection with {collection_count} documents")
            
        except Exception as e:
            print(f"  ‚úó Error connecting to ChromaDB: {e}")
            raise
            
    def _setup_llm_pipeline(self):
        """
        Setup the language model pipeline for text generation
        """
        print("  - Setting up LLM pipeline...")
        
        # Use a smaller, efficient model for demonstration
        # You can replace this with larger models like llama-2 if you have the resources
        model_name = "meta-llama/Llama-3.1-8B-Instruct"
        
        try:
            # Initialize the text generation pipeline
            self.llm_pipeline = pipeline(
                "text-generation",
                model=model_name,
                tokenizer=model_name,
                max_length=512,
                temperature=0.7,
                do_sample=True,
                pad_token_id=50256  # EOS token for GPT-based models
            )
            print("  ‚úì LLM pipeline initialized")
            
        except Exception as e:
            print(f"  ‚úó Error setting up LLM: {e}")
            # Fallback to a simpler approach
            print("  - Using fallback text generation approach...")
            self.llm_pipeline = None

    def get_embeddings(self, text):
        """
        Generate embeddings for given text (same as your original function)
        """
        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = self.embedding_model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
        return embeddings

In [4]:

# =============================================================================
# STEP 12: Connect Retrieval and Generation using LlamaIndex
# =============================================================================

def setup_llama_index_rag(rag_tutor):
    """
    Setup LlamaIndex RAG system with ChromaDB integration
    """
    print("\nStep 12: Setting up LlamaIndex RAG Integration...")
    
    try:
        # Create ChromaVectorStore from existing collection
        vector_store = ChromaVectorStore(chroma_collection=rag_tutor.collection)
        
        # Create storage context
        storage_context = StorageContext.from_defaults(vector_store=vector_store)
        
        # Configure LlamaIndex settings
        Settings.embed_model = rag_tutor.embed_model
        
        # Create index from existing vector store
        index = VectorStoreIndex.from_vector_store(
            vector_store=vector_store,
            storage_context=storage_context
        )
        
        print("  ‚úì LlamaIndex integration setup complete")
        return index
        
    except Exception as e:
        print(f"  ‚úó Error setting up LlamaIndex: {e}")
        return None

def create_query_engine(index):
    """
    Create a query engine for RAG retrieval and generation
    """
    print("  - Creating query engine...")
    
    # Define custom prompt template for Python tutoring
    qa_prompt_template = PromptTemplate(
        "Context information is below.\n"
        "---------------------\n"
        "{context_str}\n"
        "---------------------\n"
        "You are a helpful Python programming tutor. Use the context information to answer the question.\n"
        "If the context doesn't contain relevant information, say so and provide general guidance.\n"
        "Always explain concepts clearly and provide examples when helpful.\n"
        "Question: {query_str}\n"
        "Answer: "
    )
    
    # Create query engine
    query_engine = index.as_query_engine(
        similarity_top_k=3,  # Retrieve top 3 most similar chunks
        text_qa_template=qa_prompt_template
    )
    
    print("  ‚úì Query engine created")
    return query_engine

In [5]:

# =============================================================================
# STEP 13: Add Basic Context Handling
# =============================================================================

class ConversationContext:
    """
    Simple conversation context manager
    """
    def __init__(self, max_history=5):
        self.max_history = max_history
        self.conversation_history = []
        
    def add_exchange(self, question, answer):
        """
        Add a question-answer exchange to history
        """
        self.conversation_history.append({
            'question': question,
            'answer': answer
        })
        
        # Keep only recent history
        if len(self.conversation_history) > self.max_history:
            self.conversation_history.pop(0)
            
    def get_context_string(self):
        """
        Get conversation history as context string
        """
        if not self.conversation_history:
            return ""
            
        context_parts = []
        for exchange in self.conversation_history[-3:]:  # Use last 3 exchanges
            context_parts.append(f"Previous Q: {exchange['question']}")
            context_parts.append(f"Previous A: {exchange['answer'][:200]}...")  # Truncate long answers
            
        return "\n".join(context_parts)
        
    def clear_history(self):
        """
        Clear conversation history
        """
        self.conversation_history = []

def enhanced_query_with_context(query_engine, question, context_manager):
    """
    Enhanced query function with conversation context
    """
    print(f"\nStep 13: Processing query with context...")
    
    # Get conversation context
    context = context_manager.get_context_string()
    
    # Enhance question with context if available
    if context:
        enhanced_question = f"Context from previous conversation:\n{context}\n\nCurrent question: {question}"
    else:
        enhanced_question = question
        
    try:
        # Query the RAG system
        print("  - Retrieving relevant information...")
        response = query_engine.query(enhanced_question)
        
        # Extract answer
        answer = str(response)
        
        # Add to conversation history
        context_manager.add_exchange(question, answer)
        
        print("  ‚úì Query processed successfully")
        return answer
        
    except Exception as e:
        print(f"  ‚úó Error processing query: {e}")
        fallback_answer = f"I apologize, but I encountered an error processing your question about: {question}. Please try rephrasing your question or ask something else about Python programming."
        context_manager.add_exchange(question, fallback_answer)
        return fallback_answer


In [6]:

# =============================================================================
# STEP 14: Integrate RAG Backend with Gradio UI
# =============================================================================

def create_gradio_interface(rag_tutor, query_engine, context_manager):
    """
    Create Gradio interface for the RAG tutor
    """
    print("\nStep 14: Creating Gradio Interface...")
    
    def chat_function(message, history):
        """
        Main chat function for Gradio interface
        """
        if not message.strip():
            return "Please ask a question about Python programming!"
            
        # Process the query
        response = enhanced_query_with_context(query_engine, message, context_manager)
        
        return response
    
    def clear_conversation():
        """
        Clear conversation history
        """
        context_manager.clear_history()
        return "Conversation history cleared!"
        
    def get_sample_questions():
        """
        Provide sample questions for users
        """
        samples = [
            "What are Python data types?",
            "How do I create a for loop in Python?",
            "What is the difference between lists and tuples?",
            "How do I handle exceptions in Python?",
            "What are Python functions and how do I create them?"
        ]
        return "\n".join([f"‚Ä¢ {q}" for q in samples])
    
    # Create the Gradio interface
    with gr.Blocks(title="Python RAG Tutor", theme=gr.themes.Soft()) as interface:
        gr.Markdown("# Python RAG Tutor")
        gr.Markdown("Ask me anything about Python programming! I'll search through my knowledge base to help you learn.")
        
        with gr.Row():
            with gr.Column(scale=3):
                # Chat interface
                chatbot = gr.Chatbot(height=400, label="Python Tutor Chat")
                msg = gr.Textbox(
                    label="Ask your Python question",
                    placeholder="Type your Python programming question here...",
                    lines=2
                )
                
                with gr.Row():
                    submit_btn = gr.Button("Ask Question", variant="primary")
                    clear_btn = gr.Button("Clear Chat", variant="secondary")
                    
            with gr.Column(scale=1):
                # Side panel with information
                gr.Markdown("### Knowledge Base")
                gr.Markdown("I can help you with topics from:")
                gr.Markdown("‚Ä¢ Python fundamentals\n‚Ä¢ Data structures\n‚Ä¢ Control flow\n‚Ä¢ Functions\n‚Ä¢ And more!")
                
                gr.Markdown("### Sample Questions")
                sample_display = gr.Textbox(
                    value=get_sample_questions(),
                    label="Try asking:",
                    lines=8,
                    interactive=False
                )
                
                # Status information
                gr.Markdown("### System Status")
                status_text = f"Connected to knowledge base\n {rag_tutor.collection.count()} documents loaded"
                gr.Textbox(value=status_text, label="Status", lines=3, interactive=False)
        
        # Event handlers
        def respond(message, chat_history):
            if not message.strip():
                return "", chat_history
                
            # Get bot response
            bot_response = chat_function(message, chat_history)
            
            # Add to chat history
            chat_history.append((message, bot_response))
            
            return "", chat_history
        
        def clear_chat():
            clear_conversation()
            return []
            
        # Connect events
        submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
        msg.submit(respond, [msg, chatbot], [msg, chatbot])
        clear_btn.click(clear_chat, outputs=chatbot)
    
    print("  ‚úì Gradio interface created")
    return interface

In [7]:


# =============================================================================
# STEP 15: Test the Prototype and Document Issues
# =============================================================================

def test_rag_system(rag_tutor, query_engine, context_manager):
    """
    Test the RAG system with sample queries
    """
    print("\nStep 15: Testing RAG System...")
    
    test_questions = [
        "What are Python variables?",
        "How do I create a list in Python?",
        "What is a for loop?",
        "Can you explain Python functions?",
        "What are the main Python data types?"
    ]
    
    test_results = []
    
    for i, question in enumerate(test_questions, 1):
        print(f"\n--- Test {i}: {question} ---")
        
        try:
            # Record start time
            import time
            start_time = time.time()
            
            # Get response
            response = enhanced_query_with_context(query_engine, question, context_manager)
            
            # Record end time
            end_time = time.time()
            response_time = end_time - start_time
            
            # Store results
            test_results.append({
                'question': question,
                'response': response[:200] + "..." if len(response) > 200 else response,
                'response_time': response_time,
                'success': True
            })
            
            print(f"Response time: {response_time:.2f} seconds")
            print(f"Response preview: {response[:100]}...")
            
        except Exception as e:
            print(f"Error: {e}")
            test_results.append({
                'question': question,
                'response': f"Error: {e}",
                'response_time': 0,
                'success': False
            })
    
    # Generate test report
    print("\n" + "="*60)
    print("TEST REPORT")
    print("="*60)
    
    successful_tests = sum(1 for result in test_results if result['success'])
    avg_response_time = sum(result['response_time'] for result in test_results if result['success'])
    if successful_tests > 0:
        avg_response_time /= successful_tests
    
    print(f"Total tests: {len(test_questions)}")
    print(f"Successful tests: {successful_tests}")
    print(f"Failed tests: {len(test_questions) - successful_tests}")
    print(f"Average response time: {avg_response_time:.2f} seconds")
    
    # Document issues
    print("\n--- DOCUMENTED ISSUES ---")
    issues = []
    
    if successful_tests < len(test_questions):
        issues.append(f"‚Ä¢ {len(test_questions) - successful_tests} queries failed")
    
    if avg_response_time > 10:
        issues.append(f"‚Ä¢ Slow response time: {avg_response_time:.2f}s average")
    
    if len(issues) == 0:
        print("‚úÖ No major issues detected!")
    else:
        for issue in issues:
            print(issue)
    
    return test_results

In [None]:
os.environ["OPENAI_API_KEY"] = "api key here"

In [None]:


# =============================================================================
# MAIN EXECUTION
# =============================================================================

def main():
    """
    Main function to run the complete RAG tutor system
    """
    print("üöÄ Starting Complete RAG-based LLM Tutor...")
    
    try:
        # Step 11: Initialize RAG Tutor
        rag_tutor = RAGTutor()
        
        # Step 12: Setup LlamaIndex RAG
        index = setup_llama_index_rag(rag_tutor)
        if index is None:
            print("‚ùå Failed to setup LlamaIndex RAG")
            return
            
        query_engine = create_query_engine(index)
        
        # Step 13: Initialize context manager
        print("\nStep 13: Initializing conversation context...")
        context_manager = ConversationContext(max_history=5)
        print("  ‚úì Context manager initialized")
        
        # Step 15: Test the system
        test_results = test_rag_system(rag_tutor, query_engine, context_manager)
        
        # Step 14: Create and launch Gradio interface
        interface = create_gradio_interface(rag_tutor, query_engine, context_manager)
        
        print("\n" + "="*60)
        print("üéâ RAG-based LLM Tutor is ready!")
        print("="*60)
        print("The system includes:")
        print("‚úÖ Vector database with your knowledge base")
        print("‚úÖ RAG retrieval and generation")
        print("‚úÖ Conversation context handling")
        print("‚úÖ Interactive Gradio interface")
        print("‚úÖ Comprehensive testing")
        
        # Launch the interface
        print("\nüåê Launching Gradio interface...")
        interface.launch(
            share=False,  # Set to True if you want a public link
            debug=True,
            server_port=7860
        )
        
    except Exception as e:
        print(f"‚ùå Error in main execution: {e}")
        print("Please check your ChromaDB setup and ensure the knowledge base is properly created.")

# Run the main function
if __name__ == "__main__":
    main()


üöÄ Starting Complete RAG-based LLM Tutor...
Step 11: Setting up RAG Framework...
  - Initializing embedding model...
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:2 prompts are loaded, with the keys: ['query', 'text']
2 prompts are loaded, with the keys: ['query', 'text']
  ‚úì Embedding model initialized
  - Connecting to ChromaDB...
  ‚úì Connected to ChromaDB collection with 45 documents
  - Setting up LLM pipeline...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cpu


  ‚úì LLM pipeline initialized
‚úì RAG Framework setup complete!

Step 12: Setting up LlamaIndex RAG Integration...
  ‚úì LlamaIndex integration setup complete
  - Creating query engine...
  ‚úì Query engine created

Step 13: Initializing conversation context...
  ‚úì Context manager initialized

Step 15: Testing RAG System...

--- Test 1: What are Python variables? ---

Step 13: Processing query with context...
  - Retrieving relevant information...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.397561 seconds
Retrying request to /chat/completions in 0.397561 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
IN

In [26]:



# =============================================================================
# ADDITIONAL UTILITY FUNCTIONS
# =============================================================================

def quick_test_query(question="What are Python variables?"):
    """
    Quick function to test a single query without launching full interface
    """
    print(f"Quick test query: {question}")
    
    try:
        # Initialize components
        rag_tutor = RAGTutor()
        index = setup_llama_index_rag(rag_tutor)
        query_engine = create_query_engine(index)
        context_manager = ConversationContext()
        
        # Test query
        response = enhanced_query_with_context(query_engine, question, context_manager)
        
        print(f"Response: {response}")
        return response
        
    except Exception as e:
        print(f"Error in quick test: {e}")
        return None

def check_system_status():
    """
    Check if all components are working correctly
    """
    print("Checking system status...")
    
    status = {
        'chromadb_connection': False,
        'embedding_model': False,
        'collection_count': 0,
        'issues': []
    }
    
    try:
        # Check ChromaDB connection
        client = chromadb.PersistentClient(path="../data/chroma_db")
        collection = client.get_collection(name="python_tutor")
        status['chromadb_connection'] = True
        status['collection_count'] = collection.count()
        
        # Check embedding model
        from transformers import AutoTokenizer, AutoModel
        tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
        model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
        status['embedding_model'] = True
        
    except Exception as e:
        status['issues'].append(f"System check error: {e}")
    
    # Print status
    print(f"ChromaDB Connection: {'‚úÖ' if status['chromadb_connection'] else '‚ùå'}")
    print(f"Embedding Model: {'‚úÖ' if status['embedding_model'] else '‚ùå'}")
    print(f"Documents in collection: {status['collection_count']}")
    
    if status['issues']:
        print("Issues found:")
        for issue in status['issues']:
            print(f"  ‚Ä¢ {issue}")
    else:
        print("‚úÖ All systems operational!")
        
    return status

# Uncomment the following lines to run specific components:
# check_system_status()
# quick_test_query("What are Python data types?")
main()  # Run the full system

üöÄ Starting Complete RAG-based LLM Tutor...
Step 11: Setting up RAG Framework...
  - Initializing embedding model...
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:2 prompts are loaded, with the keys: ['query', 'text']
2 prompts are loaded, with the keys: ['query', 'text']
  ‚úì Embedding model initialized
  - Connecting to ChromaDB...
  ‚úì Connected to ChromaDB collection with 45 documents
  - Setting up LLM pipeline...


Device set to use cpu


  ‚úì LLM pipeline initialized
‚úì RAG Framework setup complete!

Step 12: Setting up LlamaIndex RAG Integration...
  ‚úì LlamaIndex integration setup complete
  - Creating query engine...
  ‚úì Query engine created

Step 13: Initializing conversation context...
  ‚úì Context manager initialized

Step 15: Testing RAG System...

--- Test 1: What are Python variables? ---

Step 13: Processing query with context...
  - Retrieving relevant information...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.490761 seconds
Retrying request to /chat/completions in 0.490761 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
IN


Step 13: Processing query with context...
  - Retrieving relevant information...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.418900 seconds
Retrying request to /chat/completions in 0.418900 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.821086 seconds
Retrying request to /chat/completions in 0.821086 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying