In [1]:
pip install openai chromadb tqdm

Collecting chromadb
  Downloading chromadb-1.0.12-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting fastapi==0.115.9 (from chromadb)
  Downloading fastapi-0.115.9-py3-none-any.whl.metadata (27 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-4.4.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.34.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.34.0-py3-none-any.whl.metadata (2.4 kB)
Collecting opentelemetry-instrumentation-fastapi>=0.41b0 (from chromadb)
  Downloading opentelemetry_instrumentation_fastapi-0.55b0-py3-none-any.whl.metadata (2.2 kB)
Collecting opentelemetry-sdk>=1.2.0 (fr

In [2]:
import json
import os
from tqdm import tqdm
from dotenv import load_dotenv
from openai import OpenAI
import chromadb
from chromadb.config import Settings

In [47]:
from google.colab import files
import os
from dotenv import load_dotenv

# 1. Show "Choose File" button and upload
uploaded = files.upload()  # <-- This creates the button



Saving OPENAI_API_KEY.env to OPENAI_API_KEY (2).env


In [48]:
# 2. Save the uploaded file and load it
env_filename = next(iter(uploaded))
with open(env_filename, 'wb') as f:
    f.write(uploaded[env_filename])



In [49]:
# 3. Load environment variables
load_dotenv(env_filename)


True

In [22]:
from google.colab import files
import json

uploaded = files.upload()  # Shows the "Choose File" button
json_filename = next(iter(uploaded))  # Get uploaded file name

Saving self_critique_loop_dataset.json to self_critique_loop_dataset (1).json


In [26]:
with open(json_filename, 'r') as f:
    kb_data = json.load(f)

# Preview first entry
print("Sample record from KB:")
print(json.dumps(kb_data[0], indent=2))


✅ Sample record from KB:
{
  "doc_id": "KB001",
  "question": "What are best practices for debugging?",
  "answer_snippet": "When addressing debugging, it's important to follow well-defined patterns...",
  "source": "debugging_guide.md",
  "confidence_indicator": "moderate",
  "last_updated": "2024-01-10"
}


In [17]:
!pip install openai chromadb




In [51]:
from openai import AzureOpenAI
import chromadb

# Embedding client (text-embedding-ada-002)
embedding_client = AzureOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    api_version=os.getenv("OPENAI_API_VERSION"),
    azure_endpoint=os.getenv("OPENAI_ENDPOINT")
)

# Chat completion client (e.g., gpt-4o-mini)
chat_client = AzureOpenAI(
    api_key=os.getenv("OPEN_API_CHAT_KEY"),
    api_version=os.getenv("OPENAI_CHAT_API_VERSION"),
    azure_endpoint=os.getenv("OPEN_CHAT_ENDPOINT")
)


In [19]:
import chromadb
chroma_client = chromadb.PersistentClient(path="./chroma_kb")  # auto-creates folder
collection = chroma_client.get_or_create_collection("kb_index")


In [52]:
for entry in kb_data:
    doc_id = entry["doc_id"]
    snippet = entry["answer_snippet"]

    embedding = embedding_client.embeddings.create(
        input=[snippet],
        model=os.getenv("OPENAI_DEPLOYMENT")
    ).data[0].embedding

    collection.upsert(
        documents=[snippet],
        embeddings=[embedding],
        ids=[doc_id],
        metadatas=[{
            "source": entry["source"],
            "last_updated": entry["last_updated"]
        }]
    )

print("All entries indexed into Chroma.")


All entries indexed into Chroma.


Retrieval Using Same Embedding Client

In [53]:
def retrieve_kb_snippets(query: str, top_k: int = 5):
    response = embedding_client.embeddings.create(
        input=[query],
        model=os.getenv("OPENAI_DEPLOYMENT")
    )
    query_embedding = response.data[0].embedding

    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k
    )

    return [
        {
            "doc_id": results["ids"][0][i],
            "answer_snippet": results["documents"][0][i],
            "source": results["metadatas"][0][i]["source"]
        }
        for i in range(top_k)
    ]

Answer Generator (Using Chat Client)

In [54]:
def generate_initial_answer(user_question: str, kb_hits: list):
    context = "\n".join([f"[{hit['doc_id']}] {hit['answer_snippet']}" for hit in kb_hits])
    prompt = f"""
User Question:
{user_question}

Context:
{context}

Answer concisely, citing doc_ids like [KB001].
"""
    response = chat_client.chat.completions.create(
        model=os.getenv("OPENAI_CHAT_DEPLOYMENT"),
        temperature=0,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()


Self-Critique

In [55]:
def critique_answer(user_question: str, initial_answer: str, kb_hits: list):
    context = "\n".join([f"[{hit['doc_id']}] {hit['answer_snippet']}" for hit in kb_hits])
    prompt = f"""
Evaluate this answer based on context.

Question: {user_question}

Answer: {initial_answer}

Context:
{context}

Reply with ONLY:
- COMPLETE
- REFINE: <missing topics>
"""
    response = chat_client.chat.completions.create(
        model=os.getenv("OPENAI_CHAT_DEPLOYMENT"),
        temperature=0,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()


Refiner

In [56]:
def refine_answer(user_question: str, initial_answer: str, critique: str):
    missing = critique.replace("REFINE:", "").strip()
    new_query = f"{user_question} and {missing}"
    extra_hit = retrieve_kb_snippets(new_query, top_k=1)[0]

    prompt = f"""
Refine this answer.

Question: {user_question}

Initial Answer:
{initial_answer}

Missing Info: {missing}

Snippet:
[{extra_hit['doc_id']}] {extra_hit['answer_snippet']}

Provide improved answer with citations.
"""
    response = chat_client.chat.completions.create(
        model=os.getenv("OPENAI_CHAT_DEPLOYMENT"),
        temperature=0,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()


In [58]:
def run_agentic_rag(user_question: str):
    kb_hits = retrieve_kb_snippets(user_question)
    initial = generate_initial_answer(user_question, kb_hits)
    critique = critique_answer(user_question, initial, kb_hits)

    if critique.strip().upper() == "COMPLETE":
        final = initial
    elif critique.startswith("REFINE:"):
        final = refine_answer(user_question, initial, critique)
    else:
        final = initial + "\n(Note: unclear critique)"

    return {
        "question": user_question,
        "initial_answer": initial,
        "critique": critique,
        "final_answer": final
    }


In [59]:
result = run_agentic_rag("What are best practices for debugging?")
for key, value in result.items():
    print(f"\n🔹 {key.upper()}:\n{value}")



🔹 QUESTION:
What are best practices for debugging?

🔹 INITIAL_ANSWER:
Best practices for debugging include:

1. **Reproduce the Issue**: Ensure you can consistently replicate the problem to understand its context [KB001].
2. **Isolate the Problem**: Narrow down the code to the smallest section that produces the error [KB011].
3. **Use Debugging Tools**: Utilize built-in debuggers, logging, and breakpoints to track the flow of execution [KB021].
4. **Check for Common Errors**: Look for typical mistakes such as syntax errors, off-by-one errors, or incorrect variable types [KB019].
5. **Review Recent Changes**: Analyze any recent code changes that might have introduced the issue [KB029].
6. **Consult Documentation**: Refer to relevant documentation for libraries or frameworks being used to ensure correct usage [KB001].

Following these patterns can help streamline the debugging process and lead to quicker resolutions.

🔹 CRITIQUE:
COMPLETE

🔹 FINAL_ANSWER:
Best practices for debugging in

In [61]:
def run_multiple_agentic_queries():
    print("📥 Enter your questions (type 'exit' to finish):")
    queries = []

    # Step 1: Take multiple inputs
    while True:
        user_input = input("🔹 Your Query: ")
        if user_input.lower() in ['exit', 'quit', 'done']:
            break
        queries.append(user_input)

    # Step 2: Run Agentic RAG for each
    for idx, query in enumerate(queries, start=1):
        print(f"\n\n================ Query {idx} ====================")
        print(f"❓ Question: {query}")

        result = run_agentic_rag(query)

        # Optional: Show KB hits separately
        hits = retrieve_kb_snippets(query)
        print("\n📚 KB Hits:")
        for hit in hits:
            print(f"- [{hit['doc_id']}] {hit['answer_snippet'][:100]}...")

        # Show full result
        for key, value in result.items():
            print(f"\n🔹 {key.upper()}:\n{value}")


In [62]:
run_multiple_agentic_queries()


📥 Enter your questions (type 'exit' to finish):
🔹 Your Query: What are performance tuning tips?
🔹 Your Query: How do I version my APIs
🔹 Your Query: What should I consider for error handling?
🔹 Your Query: exit


❓ Question: What are performance tuning tips?

📚 KB Hits:
- [KB002] When addressing performance tuning, it's important to follow well-defined patterns......
- [KB012] When addressing performance tuning, it's important to follow well-defined patterns......
- [KB022] When addressing performance tuning, it's important to follow well-defined patterns......
- [KB003] When addressing caching, it's important to follow well-defined patterns......
- [KB023] When addressing caching, it's important to follow well-defined patterns......

🔹 QUESTION:
What are performance tuning tips?

🔹 INITIAL_ANSWER:
Here are some performance tuning tips:

1. **Identify Bottlenecks**: Use profiling tools to find slow parts of your application.
2. **Optimize Queries**: Ensure database queries are efficien

**Core Insights**
1. Separation of Embedding and Chat Models Is Crucial
Using two different deployments (for embedding vs. chat) avoids dimension mismatch and model capability errors.

This allows modular tuning and cost control.

2. Embedding Dimension Mismatch Was a Common Pitfall
Initial InvalidArgumentError: expected 1536 got 384 revealed that Chroma’s default retriever uses MiniLM if embeddings aren't explicitly provided.

3. Self-Critique Improves Contextual Accuracy
Adding critique (REFINE: vs. COMPLETE) ensures that initial hallucinations or missed citations are caught early.

It creates a pseudo-agent behavior without requiring heavy orchestration frameworks.

4. Citation Format Enforcement ([KBxxx]) Is Key to Traceability
Enabling downstream auditing and KB linkage.

LLM must be prompted explicitly to preserve this format for reliable critique.

5. User-Controlled Refinement Increases Precision
Refinement happens only when needed (i.e., based on critique).

It prevents over-generation, reducing token usage and cost.