# PC Advisor RAG Chatbot (Google Colab)

This notebook prepares the retrieval-augmented PC advisor chatbot in a Google Colab runtime. It installs the required
packages, builds the ChromaDB vector store, loads the embedding and reranker models, and finally launches a Gradio chat
interface that talks to any OpenAI-compatible LLM endpoint. A final cell also shows how to call the pipeline directly
from Python code.

> **Important:** You must supply your own API key and base URL for an OpenAI-compatible endpoint (Groq, Together, OpenAI, etc.).
> Do **not** share the notebook after adding secrets unless you remove them first.


In [None]:
import os
import subprocess
import sys
from pathlib import Path

try:
    import google.colab  # type: ignore
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

PROJECT_ROOT = Path("/content/pc-advisor-chatbot" if IN_COLAB else ".").resolve()

if IN_COLAB:
    if not PROJECT_ROOT.exists():
        # TODO: Replace the placeholder below with the actual GitHub URL of your fork if needed.
        REPO_URL = "https://github.com/YOUR_ACCOUNT/pc-advisor-chatbot.git"
        if "YOUR_ACCOUNT" in REPO_URL:
            raise ValueError(
                "Please set REPO_URL to the GitHub HTTPS URL of your pc-advisor-chatbot repository before running this cell."
            )
        subprocess.run(["git", "clone", REPO_URL, str(PROJECT_ROOT)], check=True)
    os.chdir(PROJECT_ROOT)
else:
    if not (PROJECT_ROOT / "app.py").exists():
        raise RuntimeError(
            "Run this notebook from the root of the pc-advisor-chatbot repository or enable the Colab cloning block above."
        )
    os.chdir(PROJECT_ROOT)

print(f"Working directory: {Path.cwd()}")
print("Repository files:")
print("
".join(sorted(str(p.relative_to(Path.cwd())) for p in Path.cwd().iterdir())))


In [None]:
# Install dependencies (this may take a few minutes the first time).
!pip install -q -r requirements.txt gradio==4.44.0


In [None]:
# Build the ChromaDB vector store if it has not been created yet.
from pathlib import Path
import subprocess
import sys

chroma_path = Path("data/chromadb")
if chroma_path.exists():
    print("ChromaDB directory already exists – skipping embedding generation.")
else:
    print("Creating embeddings and populating ChromaDB (this can take several minutes)...")
    subprocess.run([sys.executable, "embedding.py"], check=True)
    print("Embedding build complete.")


In [None]:
# Load retrieval components (embedding model + reranker).
import retrieval

device = retrieval.setup_device()
embedding_model = retrieval.load_embedding_model()
reranker_data = retrieval.load_reranker_data(device)
print("Models loaded successfully.")


In [None]:
import openai
import gradio as gr
import re
from typing import Dict, Generator, List, Optional


def parse_response(response: str):
    """Split the LLM response into hidden thoughts and the visible answer."""
    think_pattern = r"<think>(.*?)</think>"
    match = re.search(think_pattern, response, re.DOTALL)
    if match:
        thought_content = match.group(1).strip()
        clean_response = re.sub(think_pattern, "", response, count=1, flags=re.DOTALL).strip()
        return thought_content, clean_response
    return None, response.strip()


def get_openai_client(api_key: str, base_url: str) -> openai.OpenAI:
    if not api_key or not base_url:
        raise ValueError("Both API key and base URL are required.")
    return openai.OpenAI(api_key=api_key, base_url=base_url)


def rewrite_query_with_llm(client: openai.OpenAI, query: str, model: str) -> str:
    if not client:
        return query
    system_prompt = (
        "You are an expert AI assistant that rewrites user queries for a vector database search. "
        "The database contains two types of documents: "
        "1. Individual PC components (CPU, GPU/VGA, RAM, SSD, Mainboard, PSU, Case, etc.). "
        "2. Pre-built or assembled PCs (ready-to-use systems). "
        "Your task is to analyze the user's natural-language query and rewrite it into a concise, "
        "keyword-rich search query that best captures the user's intent.

"
        "Guidelines:
"
        "- If the user wants to build or assemble a PC, focus on component-level keywords and budget splits.
"
        "- Dedicate roughly 80% of the budget to CPU/GPU/RAM/SSD and the rest to supporting parts.
"
        "- Provide approximate per-component prices when the total budget is known.
"
        "- If the user wants a pre-built PC, use keywords like 'pre-built PC' along with the target use case.
"
        "- Expand vague terms into precise hardware-related keywords.
"
        "- Include both Vietnamese and English keywords if helpful.
"
        "- All currency references are in VND.
"
        "Do not answer the question – only output the rewritten search query."
    )
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Rewrite the following query: '{query}'"},
            ],
            temperature=0.0,
        )
        rewritten_query = response.choices[0].message.content.strip()
        return rewritten_query.replace('"', "")
    except Exception as exc:  # noqa: BLE001
        print(f"Rewriting failed ({exc}); using original query instead.")
        return query


def generate_response_stream(
    client: openai.OpenAI,
    messages: List[Dict[str, str]],
    retrieved_info: str,
    model: str,
) -> Generator[str, None, None]:
    if not client:
        yield "Error: OpenAI client not initialized."
        return

    system_prompt = (
        "You are a helpful Vietnamese assistant for a PC parts store. "
        "Use the retrieved information below to answer the user's latest question. "
        "Be concise, natural, and friendly.

"
        f"Retrieved Information:
{retrieved_info}"
    )
    final_messages = [{"role": "system", "content": system_prompt}] + messages

    try:
        stream = client.chat.completions.create(
            model=model,
            messages=final_messages,
            stream=True,
        )
        for chunk in stream:
            content = chunk.choices[0].delta.content
            if content:
                yield content
    except Exception as exc:  # noqa: BLE001
        yield f"Error generating response: {exc}"


def format_history_for_llm(history: List[List[str]]) -> List[Dict[str, str]]:
    formatted: List[Dict[str, str]] = []
    for user_message, assistant_message in history:
        formatted.append({"role": "user", "content": user_message})
        if assistant_message:
            formatted.append({"role": "assistant", "content": assistant_message})
    return formatted


def rag_chat(message: str, history: List[List[str]], api_key: str, base_url: str, model: str):
    """Gradio-compatible chat function that performs retrieval + generation."""
    if not api_key or not base_url:
        raise gr.Error("Please provide both the API key and the base URL for your LLM endpoint.")

    client = get_openai_client(api_key, base_url)

    rewritten_query = rewrite_query_with_llm(client, message, model)
    retrieved_info = retrieval.perform_retrieval_and_reranking(
        rewritten_query,
        embedding_model,
        reranker_data,
    )

    messages = format_history_for_llm(history)
    messages.append({"role": "user", "content": message})

    collected_response = ""
    for chunk in generate_response_stream(client, messages, retrieved_info, model):
        collected_response += chunk

    thoughts, clean_response = parse_response(collected_response)
    debug_payload: Dict[str, Optional[str]] = {
        "rewritten_query": rewritten_query,
        "retrieved_context": retrieved_info,
    }
    if thoughts:
        debug_payload["llm_thoughts"] = thoughts

    return clean_response or collected_response, debug_payload


In [None]:
import gradio as gr

with gr.Blocks() as demo:
    gr.Markdown("## PC Advisor RAG Chatbot")
    gr.Markdown(
        "Provide your OpenAI-compatible API credentials below, then start chatting. "
        "Use the JSON panel to inspect the rewritten query and the retrieved context."
    )
    chat_interface = gr.ChatInterface(
        fn=rag_chat,
        additional_inputs=[
            gr.Textbox(label="API Key", type="password", placeholder="sk-..."),
            gr.Textbox(label="Base URL", value="http://127.0.0.1:1234/v1"),
            gr.Textbox(label="Model", value="qwen/qwen3-4b-thinking-2507"),
        ],
        additional_outputs=gr.JSON(label="RAG Debug Info"),
        title="PC Advisor Chatbot",
        description="Ask about PC parts or pre-built systems in Vietnamese or English.",
    )

demo.launch(share=True)


In [None]:
# Optional: call the pipeline programmatically from Python.
# Fill in your credentials before running.
API_KEY = ""
BASE_URL = "http://127.0.0.1:1234/v1"  # Update if you are using a remote endpoint
MODEL_NAME = "qwen/qwen3-4b-thinking-2507"
SAMPLE_QUESTION = "Tôi có 25 triệu, tư vấn cấu hình PC chơi game và làm đồ họa nhẹ?"

if API_KEY and BASE_URL:
    client = get_openai_client(API_KEY, BASE_URL)
    rewritten = rewrite_query_with_llm(client, SAMPLE_QUESTION, MODEL_NAME)
    print("Rewritten query:
", rewritten)

    context = retrieval.perform_retrieval_and_reranking(
        rewritten,
        embedding_model,
        reranker_data,
    )
    print("
Retrieved context:
", context)

    response_text = "".join(
        generate_response_stream(
            client,
            [{"role": "user", "content": SAMPLE_QUESTION}],
            context,
            MODEL_NAME,
        )
    )
    thoughts, final_answer = parse_response(response_text)
    if thoughts:
        print("
LLM thoughts:
", thoughts)
    print("
Assistant answer:
", final_answer or response_text)
else:
    print("Set API_KEY and BASE_URL above to test the pipeline programmatically.")
