In [None]:
!pip install pinecone

Collecting pinecone
  Downloading pinecone-6.0.2-py3-none-any.whl.metadata (9.0 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Downloading pinecone-6.0.2-py3-none-any.whl (421 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m421.9/421.9 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pinecone_plugin_interface-0.0.7-py3-none-any.whl (6.2 kB)
Installing collected packages: pinecone-plugin-interface, pinecone
Successfully installed pinecone-6.0.2 pinecone-plugin-interface-0.0.7


In [None]:
!pip install youtube_transcript_api

Collecting youtube_transcript_api
  Downloading youtube_transcript_api-1.0.3-py3-none-any.whl.metadata (23 kB)
Downloading youtube_transcript_api-1.0.3-py3-none-any.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: youtube_transcript_api
Successfully installed youtube_transcript_api-1.0.3


In [None]:
!pip install groq


Collecting groq
  Downloading groq-0.20.0-py3-none-any.whl.metadata (15 kB)
Downloading groq-0.20.0-py3-none-any.whl (124 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.9/124.9 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.20.0


In [None]:
import os
import re
import numpy as np
import pinecone
from sentence_transformers import SentenceTransformer
from groq import Groq
import torch
from typing import List, Dict, Any, Optional
from youtube_transcript_api import YouTubeTranscriptApi
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import spacy

class EnhancedRAGSystem:
    def __init__(self, pinecone_api_key: str, groq_api_key: str, index_name: str,
                 model_name: str = "sentence-transformers/all-mpnet-base-v2"):
        """
        Initialize Enhanced RAG system with Pinecone, Groq, and models.
        """
        # Initialize Pinecone client
        self.pc = pinecone.Pinecone(api_key=pinecone_api_key)
        self.index = self.pc.Index(index_name)

        # Initialize Groq client
        self.groq_client = Groq(api_key=groq_api_key)

        # Check for GPU availability
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"Using device: {self.device}")

        # Initialize embedding model
        self.embedding_model = SentenceTransformer(model_name).to(self.device)

        # Set parameters
        self.top_k = 1  # Retrieve only the top source

        # Initialize NLP tools with better error handling
        try:
            nltk.download('stopwords', quiet=True)
            nltk.download('wordnet', quiet=True)
            self.stop_words = set(stopwords.words('english'))
            self.lemmatizer = WordNetLemmatizer()
        except Exception as e:
            print(f"Warning: NLTK resource download issue. Error: {e}")
            self.stop_words = {'a', 'an', 'the', 'and', 'or', 'but', 'is', 'are', 'was', 'were', 'to', 'of', 'in', 'for'}
            self.lemmatizer = None

        # Load spaCy model with fallback
        try:
            self.nlp = spacy.load("en_core_web_sm")
        except:
            print("Warning: spaCy model 'en_core_web_sm' not found. Using a simple pipeline.")
            self.nlp = spacy.blank("en")

    def generate_answer(self, query: str, relevant_chunks: List[Dict[str, Any]]) -> str:
        """
        Generate a comprehensive answer using Groq's language model based on the query and relevant chunk.
        """
        if not relevant_chunks:
            return "I couldn't find sufficient information to answer your question. Please try rephrasing or asking a different question."

        # Use the top (most relevant) chunk
        top_chunk = relevant_chunks[0]
        context = top_chunk['text']

        # Prepare a focused prompt for the Groq language model
        prompt = f"""
        You are an intelligent assistant specialized in educational content. Your task is to create a comprehensive, well-structured answer to the user's question using the provided context.

        USER QUESTION:
        {query}

        RELEVANT CONTEXT:
        {context}
        """

        # Use Groq's chat completion API
        try:
            response = self.groq_client.chat.completions.create(
                model="llama3-70b-8192",  # You can change this to other available Groq models
                messages=[
                    {"role": "system", "content": "You are an intelligent assistant specialized in educational content."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=512,
                temperature=0.7,
                top_p=0.95
            )

            # Extract the answer from the response
            answer = response.choices[0].message.content.strip()
            return answer

        except Exception as e:
            print(f"Error generating answer with Groq: {e}")
            return "I encountered an error while generating the answer. Please try again."

    # Rest of the methods remain the same as in the original code
    def extract_transcript(self, video_id: str) -> Optional[str]:
        """
        Extract transcript from a YouTube video.

        Args:
            video_id: YouTube video ID

        Returns:
            Transcript text or None if extraction fails
        """
        if not video_id or not isinstance(video_id, str):
            print("Invalid video ID provided")
            return None

        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
            if not transcript_list:
                print(f"No transcript found for video {video_id}")
                return None

            # Ensure proper formatting with punctuation
            formatted_segments = []
            for segment in transcript_list:
                text = segment.get('text', '').strip()
                if text:
                    # Add period if segment doesn't end with punctuation
                    if not text[-1] in ['.', '!', '?', ':', ';']:
                        text += '.'
                    formatted_segments.append(text)

            full_transcript = ' '.join(formatted_segments)
            return full_transcript
        except Exception as e:
            print(f"Error fetching transcript for video {video_id}: {e}")
            return None

    def simple_tokenize(self, text):
        """Simple tokenizer that avoids NLTK's punkt."""
        text = re.sub(r'[^\w\s]', ' ', text)
        return [token for token in text.lower().split() if token]

    def preprocess_text(self, text):
        """NLP preprocessing: stopword removal, lemmatization."""
        if not text or not isinstance(text, str):
            return "", {}

        text = re.sub(r'[^\w\s]', ' ', text).lower()
        tokens = self.simple_tokenize(text)
        filtered_tokens = [word for word in tokens if word not in self.stop_words]

        # Use lemmatizer if available
        if self.lemmatizer:
            lemmatized_tokens = [self.lemmatizer.lemmatize(word) for word in filtered_tokens]
        else:
            lemmatized_tokens = filtered_tokens

        processed_text = ' '.join(lemmatized_tokens)
        return processed_text, {}

    def embed_query(self, query: str) -> List[float]:
        """Generate embedding for the query."""
        if not query or not isinstance(query, str):
            print("Warning: Empty or invalid query received for embedding")
            return [0.0] * self.embedding_model.get_sentence_embedding_dimension()

        # Apply preprocessing
        processed_query, _ = self.preprocess_text(query)

        # Ensure we have text to embed
        if not processed_query:
            processed_query = query  # Fall back to original query

        # Generate embedding
        return self.embedding_model.encode(processed_query).tolist()

    def retrieve_relevant_chunks(self, query_embedding: List[float], top_k: int = 1, video_id: Optional[str] = None) -> List[Dict[str, Any]]:
        """
        Retrieve the most relevant chunk from Pinecone based on the query embedding.
        Optionally filter by video ID.
        """
        # Prepare query parameters
        query_params = {
            "vector": query_embedding,
            "top_k": top_k,
            "include_metadata": True
        }

        # Add video ID filter if provided
        if video_id:
            query_params["filter"] = {"video_id": video_id}

        # Query Pinecone index using similarity search
        try:
            query_response = self.index.query(**query_params)
        except Exception as e:
            print(f"Error querying Pinecone: {e}")
            return []

        # Extract matches with their metadata
        matches = query_response.get('matches', [])

        # Format results
        results = []
        for match in matches:
            # Extract text sample from metadata if available
            text = match.metadata.get('text_sample', 'No text available')

            # Format the result
            result = {
                'id': match.id,
                'score': match.score,
                'text': text,
                'video_id': match.metadata.get('video_id', 'unknown'),
                'chunk_id': match.metadata.get('chunk_id', -1)
            }
            results.append(result)

        return results

    def query(self, question: str, video_id: Optional[str] = None) -> Dict[str, Any]:
        """
        Process a user query and return a well-structured answer with supporting evidence.

        Args:
            question: The user's question
            video_id: Optional YouTube video ID to filter sources
        """
        # Step 1: Generate embedding for the question
        query_embedding = self.embed_query(question)

        # Step 2: Retrieve the most relevant chunk from Pinecone, optionally filtered by video ID
        relevant_chunks = self.retrieve_relevant_chunks(
            query_embedding,
            self.top_k,
            video_id
        )

        # If no chunks found and a video ID was provided, try without the filter
        if not relevant_chunks and video_id:
            relevant_chunks = self.retrieve_relevant_chunks(query_embedding, self.top_k)

        # Step 3: Generate a comprehensive answer based on the retrieved chunk
        answer = self.generate_answer(question, relevant_chunks)

        # Step 4: Try to fetch video transcript if a video ID was provided and no relevant chunks found
        video_transcript = None
        if video_id and not relevant_chunks:
            video_transcript = self.extract_transcript(video_id)

        # Step 5: Return the answer and top source
        result = {
            "question": question,
            "answer": answer,
            "source": relevant_chunks[0] if relevant_chunks else None,
            "video_id": video_id,
            "video_transcript": video_transcript
        }

        return result

    def format_response(self, result: Dict[str, Any]) -> str:
        """
        Format the query result into a well-structured response string.
        """
        response = f"QUESTION: {result['question']}\n\nANSWER:\n{result['answer']}\n\n"

        if result.get('source'):
            source = result['source']
            response += "TOP SOURCE:\n"
            response += f"Score: {source['score']:.2f}\n"
            response += f"Text: {source['text']}\n"

        # Add video transcript if available
        if result.get('video_transcript'):
            response += "\nVIDEO TRANSCRIPT EXCERPT:\n"
            # Limit transcript to first 500 characters
            transcript_excerpt = result['video_transcript'][:500] + "..."
            response += transcript_excerpt + "\n"

        return response


# Example usage
def main():
    # Initialize the RAG system
    pinecone_api_key = "pcsk_7EKroD_MaZi2zjikyZTdpaDPCkit4qEAE6cjKuJ7C2ot9htS7EE6uurWQLrfznykMd7bW3"
    groq_api_key = "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"
    index_name = "embeddings"

    print("Initializing Enhanced RAG system...")
    rag = EnhancedRAGSystem(
        pinecone_api_key=pinecone_api_key,
        groq_api_key=groq_api_key,
        index_name=index_name,
        model_name="sentence-transformers/all-mpnet-base-v2"
    )

    # Example query with video ID
    question = "What is JVM?"
    video_id = "NUy_wOxOM8E"  # Example YouTube video ID

    # Add error handling around the main query operation
    try:
        print(f"Processing query: '{question}' with video ID: {video_id}")
        result = rag.query(question, video_id)

        # Format and print the result
        formatted_response = rag.format_response(result)
        print("\n" + "="*50)
        print(formatted_response)
        print("="*50)
    except Exception as e:
        print(f"An error occurred during query processing: {e}")

if __name__ == "__main__":
    main()

Initializing Enhanced RAG system...
Using device: cpu


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Processing query: 'What is JVM?' with video ID: NUy_wOxOM8E

QUESTION: What is JVM?

ANSWER:
I'm happy to help you understand what JVM stands for and its significance in the context of Java programming.

**What is JVM?**

JVM stands for Java Virtual Machine. It's a crucial component of the Java ecosystem, and understanding its role is essential for any Java developer.

**Definition:**

The Java Virtual Machine (JVM) is a virtual machine that runs Java bytecode on a computer. It's a program that executes Java bytecode, which is compiled from Java source code (.java files). The JVM is responsible for providing a platform-independent environment for Java programs to run, meaning that Java code can be written once and run anywhere, without the need for recompilation.

**How JVM Works:**

Here's a high-level overview of the JVM's functionality:

1. **Compilation:** Java source code (.java files) is compiled into an intermediate format called bytecode (.class files) using the `javac` compile

In [None]:
import os
import re
import json
import numpy as np
import pinecone
from sentence_transformers import SentenceTransformer
from groq import Groq
import torch
from typing import List, Dict, Any, Optional
from youtube_transcript_api import YouTubeTranscriptApi
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import spacy

class EnhancedRAGSystem:
    def __init__(self, pinecone_api_key: str, groq_api_key: str, index_name: str,
                 model_name: str = "sentence-transformers/all-mpnet-base-v2"):
        """
        Initialize Enhanced RAG system with Pinecone, Groq, and models.
        """
        # Initialize Pinecone client
        self.pc = pinecone.Pinecone(api_key=pinecone_api_key)
        self.index = self.pc.Index(index_name)

        # Initialize Groq client
        self.groq_client = Groq(api_key=groq_api_key)

        # Check for GPU availability
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"Using device: {self.device}")

        # Initialize embedding model
        self.embedding_model = SentenceTransformer(model_name).to(self.device)

        # Set parameters
        self.top_k = 1  # Retrieve only the top source

        # ToT and GoT parameters
        self.tot_breadth = 3  # Number of thought branches to explore
        self.tot_depth = 3    # Maximum depth of thought exploration
        self.got_max_connections = 5  # Maximum number of inter-thought connections

        # Initialize NLP tools with better error handling
        try:
            nltk.download('stopwords', quiet=True)
            nltk.download('wordnet', quiet=True)
            self.stop_words = set(stopwords.words('english'))
            self.lemmatizer = WordNetLemmatizer()
        except Exception as e:
            print(f"Warning: NLTK resource download issue. Error: {e}")
            self.stop_words = {'a', 'an', 'the', 'and', 'or', 'but', 'is', 'are', 'was', 'were', 'to', 'of', 'in', 'for'}
            self.lemmatizer = None

        # Load spaCy model with fallback
        try:
            self.nlp = spacy.load("en_core_web_sm")
        except:
            print("Warning: spaCy model 'en_core_web_sm' not found. Using a simple pipeline.")
            self.nlp = spacy.blank("en")

    def tree_of_thought_reasoning(self, query: str, context: str) -> str:
        """
        Implement Tree of Thought reasoning strategy.
        """
        try:
            # Initial thought generation
            initial_thoughts_prompt = f"""
            Generate {self.tot_breadth} distinct initial thoughts about the query:
            Query: {query}
            Context: {context}

            Format the response as a JSON list of thought branches, each with:
            - reasoning_path: A step-by-step reasoning approach
            - potential_answer: A draft answer
            - confidence_score: A score from 0-1 indicating thought quality
            """

            initial_thoughts_response = self.groq_client.chat.completions.create(
                model="llama3-70b-8192",
                messages=[
                    {"role": "system", "content": "You are a strategic reasoning assistant."},
                    {"role": "user", "content": initial_thoughts_prompt}
                ],
                max_tokens=512,
                temperature=0.7
            )

            # Parse initial thoughts
            initial_thoughts = json.loads(initial_thoughts_response.choices[0].message.content)

            # Evaluate and expand thoughts
            best_thoughts = []
            for thought in initial_thoughts:
                # Depth-based exploration of each thought branch
                expansion_prompt = f"""
                Expand on this reasoning path for the query:
                Original Query: {query}
                Reasoning Path: {thought['reasoning_path']}
                Potential Answer: {thought['potential_answer']}

                Provide:
                - Refined reasoning steps
                - More comprehensive answer
                - Confidence justification
                """

                expansion_response = self.groq_client.chat.completions.create(
                    model="llama3-70b-8192",
                    messages=[
                        {"role": "system", "content": "You are a detailed reasoning assistant."},
                        {"role": "user", "content": expansion_prompt}
                    ],
                    max_tokens=512,
                    temperature=0.6
                )

                expanded_thought = json.loads(expansion_response.choices[0].message.content)
                best_thoughts.append(expanded_thought)

            # Select the best thought based on confidence and comprehensiveness
            best_answer = max(best_thoughts, key=lambda x: x['confidence_score'])['potential_answer']
            return best_answer

            response_content = initial_thoughts_response.choices[0].message.content
            if not response_content:
              print("Empty response from Groq API")
              return None

            try:
              initial_thoughts = json.loads(response_content)
            except json.JSONDecodeError as je:
              print(f"JSON Parsing Error: {je}")
              print(f"Problematic Response: {response_content}")
              return None

        except Exception as e:
            print(f"Tree of Thought error: {e}")
            return None



    def graph_of_thought_reasoning(self, query: str, context: str) -> str:
        """
        Implement Graph of Thought reasoning strategy.
        """
        try:
            # Generate initial thought nodes
            nodes_prompt = f"""
            Create {self.got_max_connections} interconnected thought nodes about:
            Query: {query}
            Context: {context}

            For each node, provide:
            - Core insight
            - Potential connections to other nodes
            - Relevance score
            """

            nodes_response = self.groq_client.chat.completions.create(
                model="llama3-70b-8192",
                messages=[
                    {"role": "system", "content": "You are a graph reasoning assistant."},
                    {"role": "user", "content": nodes_prompt}
                ],
                max_tokens=512,
                temperature=0.7
            )

            # Parse thought nodes
            thought_nodes = json.loads(nodes_response.choices[0].message.content)

            # Create interconnected reasoning
            interconnection_prompt = f"""
            Create a comprehensive answer by strategically connecting these thought nodes:
            Thought Nodes: {json.dumps(thought_nodes)}

            Synthesize a coherent narrative that:
            - Connects insights logically
            - Maintains query context
            - Provides deep, multi-perspective understanding
            """

            interconnection_response = self.groq_client.chat.completions.create(
                model="llama3-70b-8192",
                messages=[
                    {"role": "system", "content": "You are an expert at synthesizing complex ideas."},
                    {"role": "user", "content": interconnection_prompt}
                ],
                max_tokens=512,
                temperature=0.6
            )

            return interconnection_response.choices[0].message.content

        except Exception as e:
            print(f"Graph of Thought error: {e}")
            return None

    def generate_answer(self, query: str, relevant_chunks: List[Dict[str, Any]]) -> str:
        """
        Enhanced answer generation using Tree of Thought and Graph of Thought strategies.
        """
        if not relevant_chunks:
            return "I couldn't find sufficient information to answer your question."

        # Use the top (most relevant) chunk
        top_chunk = relevant_chunks[0]
        context = top_chunk['text']

        # Try Tree of Thought reasoning first
        tot_answer = self.tree_of_thought_reasoning(query, context)

        if tot_answer:
            return tot_answer

        # If ToT fails, try Graph of Thought reasoning
        got_answer = self.graph_of_thought_reasoning(query, context)

        if got_answer:
            return got_answer

        # Fallback to original generation method
        try:
            response = self.groq_client.chat.completions.create(
                model="llama3-70b-8192",
                messages=[
                    {"role": "system", "content": "You are an intelligent assistant specialized in educational content."},
                    {"role": "user", "content": f"Query: {query}\nContext: {context}"}
                ],
                max_tokens=512,
                temperature=0.7,
                top_p=0.95
            )

            return response.choices[0].message.content.strip()

        except Exception as e:
            print(f"Error generating answer with Groq: {e}")
            return "I encountered an error while generating the answer. Please try again."

    def extract_transcript(self, video_id: str) -> Optional[str]:
        """
        Extract transcript from a YouTube video.

        Args:
            video_id: YouTube video ID

        Returns:
            Transcript text or None if extraction fails
        """
        if not video_id or not isinstance(video_id, str):
            print("Invalid video ID provided")
            return None

        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
            if not transcript_list:
                print(f"No transcript found for video {video_id}")
                return None

            # Ensure proper formatting with punctuation
            formatted_segments = []
            for segment in transcript_list:
                text = segment.get('text', '').strip()
                if text:
                    # Add period if segment doesn't end with punctuation
                    if not text[-1] in ['.', '!', '?', ':', ';']:
                        text += '.'
                    formatted_segments.append(text)

            full_transcript = ' '.join(formatted_segments)
            return full_transcript
        except Exception as e:
            print(f"Error fetching transcript for video {video_id}: {e}")
            return None

    def simple_tokenize(self, text):
        """Simple tokenizer that avoids NLTK's punkt."""
        text = re.sub(r'[^\w\s]', ' ', text)
        return [token for token in text.lower().split() if token]

    def preprocess_text(self, text):
        """NLP preprocessing: stopword removal, lemmatization."""
        if not text or not isinstance(text, str):
            return "", {}

        text = re.sub(r'[^\w\s]', ' ', text).lower()
        tokens = self.simple_tokenize(text)
        filtered_tokens = [word for word in tokens if word not in self.stop_words]

        # Use lemmatizer if available
        if self.lemmatizer:
            lemmatized_tokens = [self.lemmatizer.lemmatize(word) for word in filtered_tokens]
        else:
            lemmatized_tokens = filtered_tokens

        processed_text = ' '.join(lemmatized_tokens)
        return processed_text, {}

    def embed_query(self, query: str) -> List[float]:
        """Generate embedding for the query."""
        if not query or not isinstance(query, str):
            print("Warning: Empty or invalid query received for embedding")
            return [0.0] * self.embedding_model.get_sentence_embedding_dimension()

        # Apply preprocessing
        processed_query, _ = self.preprocess_text(query)

        # Ensure we have text to embed
        if not processed_query:
            processed_query = query  # Fall back to original query

        # Generate embedding
        return self.embedding_model.encode(processed_query).tolist()

    def retrieve_relevant_chunks(self, query_embedding: List[float], top_k: int = 1, video_id: Optional[str] = None) -> List[Dict[str, Any]]:
        """
        Retrieve the most relevant chunk from Pinecone based on the query embedding.
        Optionally filter by video ID.
        """
        # Prepare query parameters
        query_params = {
            "vector": query_embedding,
            "top_k": top_k,
            "include_metadata": True
        }

        # Add video ID filter if provided
        if video_id:
            query_params["filter"] = {"video_id": video_id}

        # Query Pinecone index using similarity search
        try:
            query_response = self.index.query(**query_params)
        except Exception as e:
            print(f"Error querying Pinecone: {e}")
            return []

        # Extract matches with their metadata
        matches = query_response.get('matches', [])

        # Format results
        results = []
        for match in matches:
            # Extract text sample from metadata if available
            text = match.metadata.get('text_sample', 'No text available')

            # Format the result
            result = {
                'id': match.id,
                'score': match.score,
                'text': text,
                'video_id': match.metadata.get('video_id', 'unknown'),
                'chunk_id': match.metadata.get('chunk_id', -1)
            }
            results.append(result)

        return results

    def query(self, question: str, video_id: Optional[str] = None) -> Dict[str, Any]:
        """
        Process a user query and return a well-structured answer with supporting evidence.

        Args:
            question: The user's question
            video_id: Optional YouTube video ID to filter sources
        """
        # Step 1: Generate embedding for the question
        query_embedding = self.embed_query(question)

        # Step 2: Retrieve the most relevant chunk from Pinecone, optionally filtered by video ID
        relevant_chunks = self.retrieve_relevant_chunks(
            query_embedding,
            self.top_k,
            video_id
        )

        # If no chunks found and a video ID was provided, try without the filter
        if not relevant_chunks and video_id:
            relevant_chunks = self.retrieve_relevant_chunks(query_embedding, self.top_k)

        # Step 3: Generate a comprehensive answer based on the retrieved chunk
        answer = self.generate_answer(question, relevant_chunks)

        # Step 4: Try to fetch video transcript if a video ID was provided and no relevant chunks found
        video_transcript = None
        if video_id and not relevant_chunks:
            video_transcript = self.extract_transcript(video_id)

        # Step 5: Return the answer and top source
        result = {
            "question": question,
            "answer": answer,
            "source": relevant_chunks[0] if relevant_chunks else None,
            "video_id": video_id,
            "video_transcript": video_transcript
        }

        return result

    def format_response(self, result: Dict[str, Any]) -> str:
        """
        Format the query result into a well-structured response string.
        """
        response = f"QUESTION: {result['question']}\n\nANSWER:\n{result['answer']}\n\n"

        if result.get('source'):
            source = result['source']
            response += "TOP SOURCE:\n"
            response += f"Score: {source['score']:.2f}\n"
            response += f"Text: {source['text']}\n"

        # Add video transcript if available
        if result.get('video_transcript'):
            response += "\nVIDEO TRANSCRIPT EXCERPT:\n"
            # Limit transcript to first 500 characters
            transcript_excerpt = result['video_transcript'][:500] + "..."
            response += transcript_excerpt + "\n"

        return response

def main():
    # Initialize the RAG system
    pinecone_api_key = "pcsk_7EKroD_MaZi2zjikyZTdpaDPCkit4qEAE6cjKuJ7C2ot9htS7EE6uurWQLrfznykMd7bW3"
    groq_api_key = "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"
    index_name = "embeddings"

    print("Initializing Enhanced RAG system...")
    rag = EnhancedRAGSystem(
        pinecone_api_key=pinecone_api_key,
        groq_api_key=groq_api_key,
        index_name=index_name,
        model_name="sentence-transformers/all-mpnet-base-v2"
    )

    # Example query with video ID
    while True:
        question = input("Enter your question (or 'quit' to exit): ")

        if question.lower() == 'quit':
            break

        # Optional: Ask for a specific YouTube video ID
        video_id = input("Enter YouTube Video ID (optional, press Enter to skip): ").strip()
        video_id = video_id if video_id else None

        # Add error handling around the main query operation
        try:
            print(f"\nProcessing query: '{question}' with video ID: {video_id}")
            result = rag.query(question, video_id)

            # Format and print the result
            formatted_response = rag.format_response(result)
            print("\n" + "="*50)
            print(formatted_response)
            print("="*50)
        except Exception as e:
            print(f"An error occurred during query processing: {e}")

if __name__ == "__main__":
    main()

Initializing Enhanced RAG system...
Using device: cpu

Processing query: 'what is machine learning?' with video ID: None
Tree of Thought error: Expecting value: line 1 column 1 (char 0)
Graph of Thought error: Expecting value: line 1 column 1 (char 0)

QUESTION: what is machine learning?

ANSWER:
It seems like you've already got a good start on explaining machine learning! Let me help fill in the gaps and provide a more comprehensive definition.

Machine learning is indeed a subset of Artificial Intelligence (AI) that enables machines to learn from data without being explicitly programmed. The core idea, as you mentioned, is to provide machines with access to data and let them identify patterns, relationships, and insights on their own.

In traditional programming, a computer is given a set of rules and instructions to perform a specific task. However, with machine learning, the machine is given data and allowed to develop its own rules and models based on that data. This enables the m