# Assignment 3: Agentic RAG System with Azure OpenAI & LangGraph

## 1. Setup & Installation
Install required packages for Azure OpenAI, LangGraph, Pinecone/Weaviate, MLflow, and data handling.

In [1]:
!python3 -m pip install langgraph azure-ai-inference pinecone mlflow pydantic
!python3 -m pip install langchain_openai
!python3 -m pip install pinecone



## 2. Load KB Data
Load the KB JSON file and inspect its structure.

In [2]:
import json

# Load KB data from JSON file
with open("self_critique_loop_dataset.json", "r") as f:
    kb_data = json.load(f)

# Display sample entry
print(kb_data[0])

{'doc_id': 'KB001', 'question': 'What are best practices for debugging?', 'answer_snippet': "When addressing debugging, it's important to follow well-defined patterns...", 'source': 'debugging_guide.md', 'confidence_indicator': 'moderate', 'last_updated': '2024-01-10'}


## 3. Generate Embeddings using Azure OpenAI
Use Azure's `text-embedding-3-small` model to generate embeddings for each KB entry.

In [13]:
import os
from dotenv import load_dotenv
from langchain_openai import AzureOpenAIEmbeddings
import os
load_dotenv()
# Optional: kill legacy envs that cause the validation error

for bad in ("OPENAI_API_BASE", "OPENAI_API_TYPE"):
    os.environ.pop(bad, None)

# client = OpenAIClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))

# Generate embeddings for answer_snippet

embedding_model = AzureOpenAIEmbeddings(
    # You can rely on env vars instead; passing explicitly is fine too:
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    openai_api_version=os.environ.get("AZURE_OPENAI_API_VERSION", "2024-02-01"),

    # Use your Azure **deployment name** for the embeddings model
    model="text-embedding-3-small",    # <-- your deployment name
    # dimensions=3072  # optional if you configured custom dims on text-embedding-3*
)


kb_embeddings = []


for entry in kb_data:
    text = entry["answer_snippet"]
    embedding = embedding_model.embed_query(text)
    kb_embeddings.append({
        "id": entry["doc_id"],
        "embedding": embedding,
        "metadata": entry
    })

print(f"Generated {len(kb_embeddings)} embeddings.")


Generated 30 embeddings.


## 4. Index into Pinecone
Store the generated embeddings into Pinecone trial cloud instance.

In [4]:
import os
from pinecone import Pinecone, ServerlessSpec

import certifi
import os
os.environ['SSL_CERT_FILE'] = certifi.where()


# Initialize Pinecone client
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Define index name and embedding dimension
index_name = "agentic-rag-index"
dimension = len(kb_embeddings[0]["embedding"])

# Create index if it doesn't exist
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=dimension,
        metric="cosine",  # or "euclidean"
        spec=ServerlessSpec(
            cloud="aws",       # required even for trial
            region="us-east-1" # trial region
        )
    )

# Connect to the index
index = pc.Index(index_name)

# Upsert embeddings
vectors = [(item["id"], item["embedding"], item["metadata"]) for item in kb_embeddings]
index.upsert(vectors=vectors)

print(f"Upserted {len(vectors)} vectors into Pinecone.")

  from .autonotebook import tqdm as notebook_tqdm


Upserted 30 vectors into Pinecone.


## 5. LangGraph Workflow
Define nodes for Retriever, LLM Answer, Self-Critique, and Refinement using LangGraph.

In [14]:
# -----------------------------
# RAG with LangGraph + Azure OpenAI
# -----------------------------
import os
from typing import List, Optional, TypedDict

from langgraph.graph import StateGraph, END
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings


# --- Configure Azure OpenAI ---
# Ensure these env vars are set:
# AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_API_KEY, AZURE_OPENAI_API_VERSION
llm = AzureChatOpenAI(
    azure_deployment="gpt4o",  # or "gpt-4-mini"
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    openai_api_version=os.environ.get("AZURE_OPENAI_API_VERSION", "2024-02-01"),
    temperature=0
)

embeddings = embedding_model

# --- Define State ---
class RAGState(TypedDict, total=False):
    query: str
    query_embedding: List[float]
    retrieved: List[dict]  # [{'id': str, 'metadata': {...}, 'score': float, ...}, ...]
    initial_answer: str
    critique: str          # "COMPLETE" or "REFINE"
    final_answer: str


# --- Plug in your retriever / vector index here ---
# Expectation: index.query(vector: List[float], top_k: int, include_metadata: bool)
# returns an object with .matches -> list of objects each with:
#   .id, .score, .metadata (with key "answer_snippet"), and .to_dict()
#
# Example placeholder interface:
class _Match:
    def __init__(self, _id, _metadata, _score=0.0):
        self.id = _id
        self.metadata = _metadata
        self.score = _score

    def to_dict(self):
        return {"id": self.id, "metadata": self.metadata, "score": self.score}

class _QueryResult:
    def __init__(self, matches):
        self.matches = matches

class _Index:
    def query(self, vector: List[float], top_k: int, include_metadata: bool = True):
        # TODO: Replace with your actual vector DB call.
        # For now, return empty to demonstrate guardrails.
        return _QueryResult(matches=[])

index = _Index()


# --- Node functions ---
def retrieve_snippets(state: RAGState) -> RAGState:
    query_vec = state.get("query_embedding", [])
    response = index.query(vector=query_vec, top_k=5, include_metadata=True)
    state["retrieved"] = [m.to_dict() for m in response.matches]
    return state


def _build_context(snippets: List[dict]) -> str:
    context_parts = []
    for m in snippets or []:
        mid = m.get("id", "unknown-id")
        md = m.get("metadata", {}) or {}
        snippet = md.get("answer_snippet")
        if snippet:
            context_parts.append(f"{snippet} [{mid}]")
    return " ".join(context_parts).strip()


def generate_answer(state: RAGState) -> RAGState:
    query = state["query"]
    snippets = state.get("retrieved", [])
    context = _build_context(snippets)

    if not context:
        # Minimal fallback when retrieval is empty
        prompt = (
            "The retrieval returned no helpful context. "
            f"Provide a concise, generally accepted best-practices answer to:\n\n{query}"
        )
    else:
        prompt = f"Answer the question using ONLY the following context.\n\nQuestion: {query}\n\nContext:\n{context}"

    response = llm.invoke(prompt)
    state["initial_answer"] = (getattr(response, "content", None) or "").strip()
    return state


def self_critique(state: RAGState) -> RAGState:
    answer = state.get("initial_answer", "")
    query = state["query"]
    snippets = state.get("retrieved", [])
    context = _build_context(snippets)

    # Ask the model to decide if refinement is needed, with strict output.
    critique_prompt = (
        "You are a rigorous reviewer. Determine if the answer fully addresses the question "
        "given the available context.\n\n"
        f"Question: {query}\n\n"
        f"Context: {context or '[NO CONTEXT]'}\n\n"
        f"Answer: {answer}\n\n"
        "Respond with EXACTLY one word: COMPLETE or REFINE.\n"
        "Use REFINE if the answer is missing important points, lacks evidence from context, "
        "or could be made more precise."
    )
    resp = llm.invoke(critique_prompt)
    raw = (getattr(resp, "content", "") or "").strip().upper()
    state["critique"] = "COMPLETE" if "COMPLETE" in raw and "REFINE" not in raw else "REFINE"
    return state


def refine_answer(state: RAGState) -> RAGState:
    # If COMPLETE, pass through
    if state.get("critique") == "COMPLETE":
        state["final_answer"] = state.get("initial_answer", "")
        return state

    # Otherwise, try retrieving one more snippet (e.g., best match)
    query_vec = state.get("query_embedding", [])
    resp = index.query(vector=query_vec, top_k=1, include_metadata=True)
    extra = resp.matches[0].to_dict() if resp.matches else None

    snippets = state.get("retrieved", [])[:]
    if extra:
        snippets.append(extra)

    context = _build_context(snippets)
    query = state["query"]
    prompt = (
        "Refine and improve the answer using ONLY the following context. "
        "Preserve factuality, add missing key points, and keep the writing concise.\n\n"
        f"Question: {query}\n\n"
        f"Context:\n{context}\n\n"
        f"Current Answer:\n{state.get('initial_answer', '')}"
    )
    response = llm.invoke(prompt)
    state["final_answer"] = (getattr(response, "content", None) or "").strip()
    return state


# --- Build LangGraph ---
graph = StateGraph(RAGState)
graph.add_node("retrieve", retrieve_snippets)
graph.add_node("answer", generate_answer)
graph.add_node("critique", self_critique)
graph.add_node("refine", refine_answer)

graph.set_entry_point("retrieve")
graph.add_edge("retrieve", "answer")
graph.add_edge("answer", "critique")
graph.add_conditional_edges("critique", lambda s: s["critique"], {
    "COMPLETE": END,
    "REFINE": "refine",
})
graph.add_edge("refine", END)

app = graph.compile()

# --- Run ---
query = "What are best practices for debugging?"
query_embedding = embeddings.embed_query(query)

state: RAGState = {"query": query, "query_embedding": query_embedding}
result = app.invoke(state)

# Prefer final_answer; fall back to initial_answer
print(result.get("final_answer") or result.get("initial_answer") or "No answer produced.")

Here are some best practices for debugging:

1. **Understand the Problem**: Clearly define the issue and gather as much information as possible about the symptoms and context.

2. **Reproduce the Issue**: Try to consistently reproduce the bug to understand its conditions and behavior.

3. **Check Logs and Error Messages**: Review logs, error messages, and stack traces for clues about what went wrong.

4. **Isolate the Code**: Narrow down the code section where the bug occurs. Use techniques like commenting out sections or using breakpoints.

5. **Use a Debugger**: Utilize debugging tools to step through the code, inspect variables, and monitor the flow of execution.

6. **Add Logging**: Insert logging statements to track variable values and program flow, especially in complex areas.

7. **Simplify the Problem**: Reduce the complexity of the code to isolate the bug, if possible.

8. **Check for Common Issues**: Look for common pitfalls such as off-by-one errors, null references, or inco

## 6. MLflow Logging
Log retrieved snippets, model outputs, critique results, and final answers.

In [None]:
import mlflow

mlflow.set_experiment("Agentic_RAG")

with mlflow.start_run():
    mlflow.log_param("query", "What are best practices for debugging?")
    mlflow.log_param("retrieved_docs", [match["id"] for match in result["retrieved"]])
    mlflow.log_param("critique", result["critique"])
    mlflow.log_text(result["initial_answer"], "initial_answer.txt")
    mlflow.log_text(result["final_answer"], "final_answer.txt")