# Basic Retrieval-Augmented Generation (RAG) with AIProjectClient

In this notebook, we'll demonstrate a **basic RAG** flow using:
- **`azure-ai-projects`** (AIProjectClient)
- **`azure-ai-inference`** (Embeddings, ChatCompletions)
- **`azure-ai-search`** (for vector or hybrid search)
- **RAG Mini Wikipedia dataset** - A curated dataset perfect for learning RAG concepts

We'll use the **RAG Mini Wikipedia** dataset which contains 3,200+ Wikipedia passages and Q&A pairs, providing a rich knowledge base for our RAG system.

## What is RAG?
Retrieval-Augmented Generation (RAG) is a technique where the LLM (Large Language Model) uses relevant retrieved text chunks from your data to craft a final answer. This helps ground the model's response in real data, reducing hallucinations.

## Setup
We'll import libraries, load environment variables, and create an `AIProjectClient`.


In [None]:
import os
import time
import json
from dotenv import load_dotenv

# azure-ai-projects
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential

# For vector search or hybrid search
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.core.credentials import AzureKeyCredential

load_dotenv(override=True)

conn_string = os.environ.get("AIPROJECT_ENDPOINT")
chat_model = os.environ.get("MODEL_DEPLOYMENT_NAME", "gpt-4o")
embedding_model = os.environ.get("EMBEDDING_MODEL_DEPLOYMENT_NAME", "text-embedding-3-small")
search_index_name = os.environ.get("SEARCH_INDEX_NAME", "rag-mini-wikipedia-index")

try:
    project_client = AIProjectClient(
        credential=DefaultAzureCredential(),
        endpoint=os.environ["AIPROJECT_ENDPOINT"],
    )
    print("✅ AIProjectClient created successfully!")
except Exception as e:
    print("❌ Error creating AIProjectClient:", e)

## Load RAG Mini Wikipedia Dataset
We'll load the RAG Mini Wikipedia dataset which contains high-quality Wikipedia passages perfect for RAG learning. This dataset provides:
- **3,200+ text passages** covering diverse topics
- **900+ Q&A pairs** for testing and evaluation
- **Clean, structured content** ready for embedding and search

In [None]:
from datasets import load_dataset

# Load the RAG Mini Wikipedia dataset
print("📥 Loading RAG Mini Wikipedia dataset...")

# Initialize variables
wikipedia_docs = []
qa_pairs = []

try:
    # Load text corpus
    print("🔄 Loading text corpus...")
    text_corpus_dataset = load_dataset("rag-datasets/rag-mini-wikipedia", "text-corpus")
    text_corpus = text_corpus_dataset["passages"]
    
    # Load Q&A pairs 
    print("🔄 Loading Q&A pairs...")
    qa_dataset = load_dataset("rag-datasets/rag-mini-wikipedia", "question-answer")
    qa_pairs_dataset = qa_dataset["test"]

    
    # Convert to our format (taking first 100 documents for this demo)
    sample_size = min(100, len(text_corpus))
    for i in range(sample_size):
        doc = text_corpus[i]
        
        # Handle different possible data structures
        if isinstance(doc, dict):
            content = doc.get("passage", doc.get("text", str(doc)))
        elif isinstance(doc, str):
            content = doc
        else:
            content = str(doc)
            
        wikipedia_docs.append({
            "id": f"doc_{i}",
            "content": content,
            "source": "Wikipedia"
        })
    
    # Process Q&A pairs (taking first 50 pairs for this demo)
    qa_sample_size = min(50, len(qa_pairs_dataset))
    for i in range(qa_sample_size):
        qa_item = qa_pairs_dataset[i]
        
        # Handle different possible data structures
        if isinstance(qa_item, dict):
            question = qa_item.get("question", "")
            answer = qa_item.get("answer", "")
        else:
            # Skip if not in expected format
            continue
            
        qa_pairs.append({
            "question": question,
            "answer": answer
        })
    
    print(f"✅ Dataset loaded successfully!")
    print(f"📚 Knowledge base: {len(wikipedia_docs)} documents (from {len(text_corpus)} total)")
    print(f"❓ Q&A pairs: {len(qa_pairs)} pairs available for testing")
    
    # Show a sample document
    if wikipedia_docs:
        print(f"\n📄 Sample document:")
        print(f"Content: {wikipedia_docs[0]['content'][:200]}...")
    
    # Show a sample Q&A pair
    if qa_pairs:
        print(f"\n❓ Sample Q&A:")
        print(f"Q: {qa_pairs[0]['question']}")
        print(f"A: {qa_pairs[0]['answer']}")
    
except Exception as e:
    print(f"❌ Error loading dataset: {e}")
    print(f"🔍 Error type: {type(e)}")
    

print(f"\n🎯 Ready to proceed with {len(wikipedia_docs)} documents!")
print(f"📊 Available Q&A pairs: {len(qa_pairs)}")

# Show sample data regardless of source
if wikipedia_docs:
    print(f"\n📄 First document preview:")
    print(f"ID: {wikipedia_docs[0]['id']}")
    print(f"Content: {wikipedia_docs[0]['content'][:150]}...")
    print(f"Source: {wikipedia_docs[0]['source']}")

if qa_pairs:
    print(f"\n❓ First Q&A pair:")
    print(f"Q: {qa_pairs[0]['question']}")
    print(f"A: {qa_pairs[0]['answer']}")

## Create Index


In [None]:
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    SearchFieldDataType,
    SimpleField,
    SearchableField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    HnswParameters,
    VectorSearchAlgorithmKind,
    VectorSearchAlgorithmMetric,
    VectorSearchProfile,
)

def create_wikipedia_index(
        endpoint: str, api_key: str, index_name: str, 
        dimension: int = 1536 # if using text-embedding-3-small
        ):
    """Create or update a search index for Wikipedia docs with vector search capability."""
    
    index_client = SearchIndexClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
    
    # Try to delete existing index
    try:
        index_client.delete_index(index_name)
        print(f"Deleted existing index: {index_name}")
    except Exception:
        pass  # Index doesn't exist yet
        
    # Define vector search configuration
    vector_search = VectorSearch(
        algorithms=[
            HnswAlgorithmConfiguration(
                name="myHnsw",
                kind=VectorSearchAlgorithmKind.HNSW,
                parameters=HnswParameters(
                    m=4,
                    ef_construction=400,
                    ef_search=500,
                    metric=VectorSearchAlgorithmMetric.COSINE
                )
            )
        ],
        profiles=[
            VectorSearchProfile(
                name="myHnswProfile",
                algorithm_configuration_name="myHnsw"
            )
        ]
    )
    
    # Define fields
    fields = [
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SearchableField(name="content", type=SearchFieldDataType.String),
        SimpleField(name="source", type=SearchFieldDataType.String),
        SearchField(
            name="embedding", 
            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
            vector_search_dimensions=dimension,
            vector_search_profile_name="myHnswProfile" 
        ),
    ]
    
    # Create index definition
    index_def = SearchIndex(
        name=index_name,
        fields=fields,
        vector_search=vector_search
    )
    
    # Create the index
    index_client.create_index(index_def)
    print(f"✅ Created or reset index: {index_name}")

## Upload Wikipedia Documents

Now we'll put our Wikipedia knowledge into action by:
1. **Creating a search connection** to Azure AI Search
2. **Building our index** with vector search capability
3. **Generating embeddings** for each Wikipedia document
4. **Uploading** the documents with their embeddings

This creates our knowledge base that we'll search through later. Think of it as building our 'Wikipedia library' that our AI assistant can reference! 📚🔍

In [None]:
from azure.ai.projects.models import ConnectionType

# Step 1: Get search connection
search_conn = project_client.connections.get_default(
    connection_type=ConnectionType.AZURE_AI_SEARCH, 
    include_credentials=True
)
if not search_conn:
    raise RuntimeError("❌ No default Azure AI Search connection found!")

print("✅ Got search connection")

# Step 2: Get embeddings client and check embedding length
embeddings_client = project_client.inference.get_embeddings_client()

print("✅ Created embeddings client")

sample_doc = wikipedia_docs[0]
emb_response = embeddings_client.embed(
        model=embedding_model,
        input=[sample_doc["content"]]
    )
embedding_length = len(emb_response.data[0].embedding)
print(f"✅ Got embedding length: {embedding_length}")

# Step 3: Create the index
create_wikipedia_index(
    endpoint=search_conn.target,
    api_key=search_conn.credentials['key'],
    index_name=search_index_name,
    dimension=embedding_length   
)

# Step 4: Create search client for uploading documents
search_client = SearchClient(
    endpoint=search_conn.target,
    index_name=search_index_name,
    credential=AzureKeyCredential(search_conn.credentials['key'])
)
print("✅ Created search client")

# Step 5: Embed and upload documents
search_docs = []
for doc in wikipedia_docs:
    # Get embedding for document content
    emb_response = embeddings_client.embed(
        model=embedding_model,
        input=[doc["content"]]
    )
    emb_vec = emb_response.data[0].embedding
    
    # Create document with embedding
    search_docs.append({
        "id": doc["id"],
        "content": doc["content"],
        "source": doc["source"],
        "embedding": emb_vec,
    })

# Upload documents to index
result = search_client.upload_documents(documents=search_docs)
print(f"✅ Uploaded {len(search_docs)} documents to search index '{search_index_name}'")

## Basic RAG Flow
### Retrieve
When a user queries, we:
1. Embed user question.
2. Search vector index with that embedding to get top docs.

### Generate answer
We then pass the retrieved docs to the chat model.

> In a real scenario, you'd have a more advanced approach to chunking & summarizing. We'll keep it simple.


In [None]:
from azure.search.documents.models import VectorizedQuery
from azure.ai.inference.models import SystemMessage, UserMessage
def rag_chat(query: str, top_k: int = 3) -> str:
    # 1) Embed user query
    user_vec = embeddings_client.embed(
        model=embedding_model,
        input=[query]).data[0].embedding

    # 2) Vector search using VectorizedQuery
    vector_query = VectorizedQuery(
        vector=user_vec,
        k_nearest_neighbors=top_k,
        fields="embedding"
    )

    results = search_client.search(
        search_text="",  # Optional text query
        vector_queries=[vector_query],
        select=["content", "source"]  # Only retrieve fields we need
    )

    # gather the top docs
    top_docs_content = []
    for r in results:
        c = r["content"]
        s = r["source"]
        top_docs_content.append(f"Source: {s} => {c}")

    # 3) Chat with retrieved docs
    system_text = (
        "You are a helpful assistant that answers questions using Wikipedia knowledge.\n"
        "Answer user questions using ONLY the text from these docs.\n"
        "Docs:\n"
        + "\n".join(top_docs_content)
        + "\nIf unsure, say 'I'm not sure based on the provided information'.\n"
    )

    with project_client.inference.get_chat_completions_client() as chat_client:
        response = chat_client.complete(
            model=chat_model,
            messages=[
                SystemMessage(content=system_text),
                UserMessage(content=query)
            ]
        )
    return response.choices[0].message.content

## Try a Query 🎉
Let's ask a question that might be answered by our Wikipedia knowledge base.

In [None]:
user_query = "What is Uruguay known for?"
answer = rag_chat(user_query)
print("🗣️ User Query:", user_query)
print("🤖 RAG Answer:", answer)

## Conclusion
We've demonstrated a **basic RAG** pipeline with:
- **Loading** the RAG Mini Wikipedia dataset with 3,200+ passages
- **Embedding** Wikipedia docs & storing them in **Azure AI Search**
- **Retrieving** top relevant docs for user questions
- **Chat** with the retrieved Wikipedia knowledge


## RAG-Enhanced AI Agent 🤖📚

Now let's create an **AI Agent that uses RAG** to answer questions about our Wikipedia knowledge base. This agent will:

1. **Search** our Wikipedia vector index for relevant documents
2. **Use the retrieved knowledge** to provide informed answers
3. **Maintain conversation context** across multiple interactions

This combines the power of AI agents with retrieval-augmented generation!

In [None]:
# Official Azure AI Search Agent Demo
# Based on: https://learn.microsoft.com/en-us/azure/ai-foundry/agents/how-to/tools/azure-ai-search-samples

from azure.ai.agents.models import AzureAISearchTool, ToolSet, ConnectionType

# Get the search connection and index name
search_connection = project_client.connections.get_default(
    connection_type=ConnectionType.AZURE_AI_SEARCH,
    include_credentials=True
)

print(f"✅ Using search connection: {search_connection.name}")
print(f"🔍 Index: {search_index_name}")

# Create the Azure AI Search tool with required parameters
search_tool = AzureAISearchTool(
    index_connection_id=search_connection.name,
    index_name=search_index_name
)

# Create toolset and add the search tool
toolset = ToolSet()
toolset.add(search_tool)

# Create agent with search capabilities
agent = project_client.agents.create_agent(
    model=chat_model,
    name="search-agent",
    instructions=f"""You are a helpful assistant that can search for information using Azure AI Search.

You have access to a Wikipedia knowledge base through the '{search_index_name}' index.
    
When users ask questions:
1. Use the Azure AI Search tool to find relevant information
2. Provide clear, accurate answers based on the search results
3. If no relevant information is found, let the user know
4. Be conversational and helpful""",
    toolset=toolset,
)

print(f"✅ Created agent: {agent.id}")

# Create a thread for the conversation
thread = project_client.agents.threads.create()
print(f"✅ Created thread: {thread.id}")

print("\\n🎯 Agent ready to answer questions using Azure AI Search!")

## Register Function Tool and Test the RAG Agent

Now we need to register our search function so the agent can call it, and then test our RAG-enhanced agent with some questions!

In [None]:
# Simple function to chat with the agent
def chat_with_agent(question: str):
    """Send a question to the agent and get a response."""
    
    # Create message
    message = project_client.agents.messages.create(
        thread_id=thread.id,
        role="user",
        content=question
    )
    
    # Run the agent
    run = project_client.agents.runs.create_and_process(
        thread_id=thread.id,
        agent_id=agent.id
    )
    
    print(f"👤 Question: {question}")
    print(f"🔄 Status: {run.status}")
    
    if run.status == "completed":
        # Get messages and display the response
        messages = project_client.agents.messages.list(thread_id=thread.id)
        for message in messages:
            if message.role == "assistant":
                for content in message.content:
                    if content.type == "text":
                        print(f"🤖 Answer: {content.text.value}")
                        return
    else:
        print(f"❌ Run failed: {run.last_error}")

# Demo: Test the agent with questions about our Wikipedia knowledge base
print("\\n" + "="*60)
print("🧪 DEMO: Agent with Azure AI Search")
print("="*60)

# Test questions
questions = [
    "Tell me about Uruguay",
    "What information do you have about Abraham Lincoln?",
    "What is the capital of Uruguay?"
]

for i, question in enumerate(questions, 1):
    print(f"\\n📋 Test {i}:")
    chat_with_agent(question)
    print("-" * 40)