In [None]:
!pip install langchain langchain_community -q
!pip install google-cloud-bigquery requests -q
!pip install google-cloud-aiplatform vertexai scikit-learn numpy -q
!pip install trafilatura -q
!pip install customtkinter -q
!pip install --upgrade rich -q
!pip install nbstripout -q




In [None]:
# =============================================================================
# IMPORT SECTION WITH CONTEXTUAL COMMENTS
# =============================================================================

# Data fetching and web content extraction utilities
import requests  # For HTTP requests to fetch authentication tokens and web content
from google.oauth2 import credentials  # Google Cloud authentication framework
from google.cloud import bigquery  # BigQuery client for data warehouse operations
from google.oauth2.credentials import Credentials  # OAuth2 credential management

# Google Vertex AI for generative AI capabilities
import vertexai  # Main Vertex AI SDK initialization
from vertexai.generative_models import GenerativeModel  # For Gemini model integration

# Web content extraction and text processing
import trafilatura  # Advanced web scraping and content extraction from URLs

# System utilities for performance monitoring and concurrency
import time  # For timing operations and performance metrics
import threading  # For parallel execution of BigQuery queries
from functools import partial  # For function argument binding

# Jupyter notebook UI components for interactive experience
import ipywidgets as widgets  # Interactive UI elements for question selection
from IPython.display import display, clear_output  # Display management in notebooks

# Rich text formatting for enhanced terminal output
from rich.console import Console  # Advanced console output with formatting
from rich.table import Table  # Tabular data display
from rich.panel import Panel  # Bordered content panels
from rich.text import Text  # Formatted text rendering
from rich.markdown import Markdown  # Markdown rendering support


class NVIDIAExpertSystem:
    def __init__(self):
        """
        Initialize the NVIDIA Expert System with hardcoded configuration for judges.
        Sets up authentication, BigQuery client, Vertex AI, and model references.
        This constructor handles the complete setup of the RAG pipeline components.
        """
        # --- Hardcoded Configuration for Judges ---
        # Project and dataset identifiers for BigQuery operations
        self.EMBEDDING_MODEL = "text_embedding_model"
        self.GEMINI_MODEL = "gemini-2.0-flash"
        self.CLOUD_RUN_TOKEN_URL = "https://bq-token-vendor-987726911762.us-central1.run.app/token"
        self.GCP_PROJECT_ID = "precise-mystery-466919-u5"
        self.DATASET_ID = "nvidia_docs_qa"
        self.EMBEDDING_MODEL = "text_embedding_model"
        self.GEMINI_MODEL = "gemini-2.0-flash"

        # Full table references for unified knowledge base
        self.NVIDIA_EMBEDDINGS_TABLE = f"`{self.GCP_PROJECT_ID}.{self.DATASET_ID}.unified_nvidia_embeddings`"
        self.NVIDIA_KNOWLEDGE_TABLE = f"`{self.GCP_PROJECT_ID}.{self.DATASET_ID}.unified_nvidia_knowledge`"
        self.SO_EMBEDDINGS_TABLE = f"`{self.GCP_PROJECT_ID}.{self.DATASET_ID}.stackoverflow_embeddings`"
        self.SO_KNOWLEDGE_TABLE = f"`{self.GCP_PROJECT_ID}.{self.DATASET_ID}.stackoverflow_knowledge_clone`"

        # Model reference for embedding generation
        self.EMBEDDING_MODEL_REF = f"`{self.GCP_PROJECT_ID}.{self.DATASET_ID}.{self.EMBEDDING_MODEL}`"

        # --- Authentication Token Acquisition ---
        print(f"🔑 Fetching temporary access token from: {self.CLOUD_RUN_TOKEN_URL}")
        try:
            resp = requests.get(self.CLOUD_RUN_TOKEN_URL)
            resp.raise_for_status()
            token = resp.json()["access_token"]
            print("✅ Successfully fetched temporary token!")
        except requests.exceptions.RequestException as e:
            print(f"❌ ERROR: Failed to get token. Details: {e}")
            raise

        # --- BigQuery Client Initialization ---
        print("📊 Initializing BigQuery client with temporary credentials...")
        creds = Credentials(token)
        self.client = bigquery.Client(credentials=creds, project=self.GCP_PROJECT_ID)
        print("✅ BigQuery client is ready.")

        # --- Vertex AI Initialization ---
        print("🚀 Initializing Vertex AI...")
        vertexai_creds = Credentials(token)
        vertexai.init(
            project=self.GCP_PROJECT_ID,
            location="us-central1",
            credentials=vertexai_creds
        )
        self.gen_model = GenerativeModel(self.GEMINI_MODEL)
        self.console = Console()
        print("✅ Vertex AI initialized!")

    def get_embeddings_from_bigquery(self, texts):
        """
        Convert input texts to vector embeddings using BigQuery ML's embedding model.
        This method handles text sanitization and executes SQL queries to generate
        embeddings for semantic search operations.

        Parameters:
        texts (list): List of text strings to convert to embeddings

        Returns:
        list: List of embedding vectors for each input text
        """
        embeddings = []
        for text in texts:
            # Sanitize text for SQL query safety
            safe_text = text.replace("'", "''").replace('"', '""')

            # SQL query to generate embeddings using BigQuery ML
            query = f"""
            SELECT ml_generate_embedding_result
            FROM ML.GENERATE_EMBEDDING(
                MODEL {self.EMBEDDING_MODEL_REF},
                (SELECT '{safe_text}' AS content)
            )
            """
            query_job = self.client.query(query)
            result = query_job.result()
            for row in result:
                embeddings.append(row.ml_generate_embedding_result)
        return embeddings

    def search_similar_documents(self, question, top_k=10):
        """
        Perform semantic search across NVIDIA documentation and Stack Overflow knowledge base.
        This method executes parallel vector searches against both knowledge sources
        and returns the most relevant documents sorted by similarity score.

        Parameters:
        question (str): User query to search for
        top_k (int): Number of top results to return

        Returns:
        list: Top matching documents sorted by similarity score
        """
        # Generate embedding for the user question
        question_embedding = self.get_embeddings_from_bigquery([question])[0]
        embedding_str = ','.join(map(str, question_embedding))

        # Query for NVIDIA documentation using vector search
        nvidia_query = f"""
            WITH search_results AS (
                SELECT
                    base.doc_id,
                    distance
                FROM VECTOR_SEARCH(
                    TABLE {self.NVIDIA_EMBEDDINGS_TABLE},
                    'embedding',
                    (SELECT [{embedding_str}] AS query_vector),
                    top_k => {top_k},
                    distance_type => 'COSINE'
                )
            )
            SELECT
                'NVIDIA Docs' AS source_type,
                k.content,
                s.distance AS similarity_score,
                k.source_url
            FROM search_results s
            JOIN {self.NVIDIA_KNOWLEDGE_TABLE} k
              ON s.doc_id = k.doc_id
            ORDER BY s.distance ASC
        """

        # Query for Stack Overflow questions using vector search
        so_query = f"""
            WITH search_results AS (
                SELECT
                    base.doc_id,
                    distance
                FROM VECTOR_SEARCH(
                    TABLE {self.SO_EMBEDDINGS_TABLE},
                    'embedding',
                    (SELECT [{embedding_str}] AS query_vector),
                    top_k => {top_k},
                    distance_type => 'COSINE'
                )
            )
            SELECT
                'Stack Overflow' AS source_type,
                CONCAT('**Question:** ', k.title, '\\n\\n**Answer:** ', k.answer) AS content,
                s.distance AS similarity_score,
                k.source_url
            FROM search_results s
            JOIN {self.SO_KNOWLEDGE_TABLE} k
              ON s.doc_id = k.doc_id
            ORDER BY s.distance ASC
        """

        # Execute both queries in parallel for optimal performance
        nvidia_job = self.client.query(nvidia_query)
        so_job = self.client.query(so_query)

        # Combine and sort results by similarity score
        all_results = list(nvidia_job.result()) + list(so_job.result())
        all_results.sort(key=lambda x: x.similarity_score)

        return all_results[:top_k]

    def _display_search_metrics(self, docs):
        """
        Display search results in a formatted table with confidence scores.
        This method visualizes the retrieval results with color-coded
        confidence indicators and source attribution.

        Parameters:
        docs (list): List of document results from vector search
        """
        table = Table(title="📊 Search & Retrieval Metrics", show_header=True, header_style="bold magenta")
        table.add_column("#", style="dim", width=3)
        table.add_column("Source", style="bold blue", width=15)
        table.add_column("Document Snippet", style="cyan", no_wrap=True, width=70)
        table.add_column("Source URL", style="green", no_wrap=True, width=50)
        table.add_column("Confidence", justify="right", style="bold yellow")

        for i, doc in enumerate(docs, 1):
            # Convert cosine similarity to confidence percentage
            confidence = (1 - doc.similarity_score) * 100
            snippet = doc.content.replace('\n', ' ').strip()

            # Color code confidence levels
            confidence_text = f"{confidence:.1f}%"
            if confidence > 75:
                color = "green"
            elif confidence > 50:
                color = "yellow"
            else:
                color = "red"

            table.add_row(
                str(i),
                doc.source_type,
                snippet[:68] + "..." if len(snippet) > 70 else snippet,
                doc.source_url,
                Text(confidence_text, style=color)
            )
        self.console.print(table)

    def _display_performance_metrics(self, timings):
        """
        Display performance timing metrics for each stage of the RAG pipeline.
        This method provides transparency into the system's operational
        efficiency by breaking down processing times.

        Parameters:
        timings (dict): Dictionary of timing measurements for each processing stage
        """
        table = Table(title="⏱️ Performance Metrics", show_header=True, header_style="bold blue")
        table.add_column("Stage", style="cyan")
        table.add_column("Duration (s)", style="magenta", justify="right")
        table.add_column("Percentage", style="green", justify="right")

        total_time = sum(timings.values())
        for stage, duration in timings.items():
            percentage = (duration / total_time * 100) if total_time > 0 else 0
            table.add_row(stage, f"{duration:.3f}", f"{percentage:.1f}%")

        table.add_section()
        table.add_row("Total", f"{total_time:.3f}", "100.0%")
        self.console.print(table)

    def generate_answer(self, question):
        """
        End-to-end RAG pipeline: Search → Context Building → Answer Generation.
        This method orchestrates the complete retrieval-augmented generation
        process, from query processing to final answer delivery with citations.

        Parameters:
        question (str): User question to process through the RAG pipeline
        """
        timings = {}

        # --- 1. Vector Search Phase ---
        start_time = time.time()
        similar_docs = self.search_similar_documents(question, top_k=3)
        timings['Vector Search'] = time.time() - start_time

        if not similar_docs:
            self.console.print("[bold red]I couldn't find relevant information in our NVIDIA documentation.[/bold red]")
            return

        # Display search metrics to user
        self._display_search_metrics(similar_docs)

        # --- 2. Context Building Phase ---
        start_time = time.time()
        context_parts = [f"source: {doc.source_url}\ncontent: {doc.content}" for doc in similar_docs]
        sources = [f"source: {doc.source_url}\n" for doc in similar_docs]
        timings['Content Fetching'] = time.time() - start_time

        # Format context for LLM consumption
        context_text = "\n\n---\n\n".join(context_parts)
        sources_text = "\n\n---\n\n".join(sources)
        self.console.print(Panel(sources_text, title="[bold blue]📝 Context for LLM[/bold blue]", expand=False))

        # Construct RAG prompt with context and instructions
        prompt = f"""
        **Role**: You are an enthusiastic NVIDIA GPU expert assistant. You love helping developers with CUDA, GPU programming, and AI technologies.

        **Context from NVIDIA Documentation**:
        {context_text}

        **User Question**: {question}

        **Instructions**:
        - Answer conversationally and helpfully, like a knowledgeable colleague.
        - Use bullet points or numbered steps when explaining complex topics.
        - Show enthusiasm for NVIDIA technologies.
        - Keep it professional but friendly.
        - Use emojis sparingly to make it engaging.
        - Always base your answer strictly on the context provided.

        **Your Response**:
        """

        # --- 3. Answer Generation Phase ---
        start_time = time.time()
        response = self.gen_model.generate_content(
            prompt,
            generation_config={
                "temperature": 0.3,  # Balanced creativity vs accuracy
                "max_output_tokens": 1024,  # Optimal response length
                "top_p": 0.9  # Nucleus sampling parameter
            }
        )
        timings['Answer Generation'] = time.time() - start_time

        # Display final answer with performance metrics
        self.console.print(Panel(Markdown(response.text), title="[bold green]💡 NVIDIA AI Assistant Says...[/bold green]"))
        self._display_performance_metrics(timings)


def run_assistant():
    """
    Main function to initialize and run the NVIDIA Expert System with interactive UI.
    This function sets up the user interface and handles the interactive question
    submission workflow for the RAG system.
    """
    # Initialize the expert system
    expert = NVIDIAExpertSystem()
    expert.console.print(Panel("[[bold green]🚀 NVIDIA AI Assistant Initialized[/bold green]]", title="✅ System Ready", expand=False))

    # Predefined questions for quick testing
    questions = [
        "What is CUDA memory coalescing and why is it important?",
        "How can I optimize CUDA kernels for better performance?",
        "What are the differences between shared memory and global memory in CUDA?",
        "Explain the concept of warp divergence in CUDA.",
        "How do CUDA streams help with concurrency?",
    ]

    # --- UI Components for Interactive Experience ---
    question_buttons = [widgets.Button(description=q, layout=widgets.Layout(width='95%')) for q in questions]
    custom_question_text = widgets.Text(placeholder='Or type your own question here...', layout=widgets.Layout(width='70%'))
    custom_question_button = widgets.Button(description="Ask Assistant", button_style='success')
    output_area = widgets.Output()

    def ask_question(question_text):
        """Handler function for question submission that orchestrates the answer generation process"""
        with output_area:
            clear_output()
            expert.console.print(Panel(f"[bold yellow]❓ Asking[/bold yellow]: {question_text}", title="User Question"))
            expert.generate_answer(question_text)

    def on_button_clicked(b):
        """Callback for predefined question buttons that triggers the question processing pipeline"""
        ask_question(b.description)

    def on_custom_button_clicked(b):
        """Callback for custom question submission that validates input and initiates processing"""
        if custom_question_text.value:
            ask_question(custom_question_text.value)

    # Register event handlers
    for btn in question_buttons:
        btn.on_click(on_button_clicked)
    custom_question_button.on_click(on_custom_button_clicked)

    # --- UI Layout Assembly ---
    expert.console.print(Panel("[bold cyan]Select a question or enter your own below:[/bold cyan]"))
    buttons_box = widgets.VBox(question_buttons)
    custom_input_box = widgets.HBox([custom_question_text, custom_question_button])
    display(buttons_box, custom_input_box, output_area)


if __name__ == "__main__":
    run_assistant()