# 4. Retrieval-Augmented Generation (RAG) with Azure OpenAI and Azure AI Search

In this notebook, we will demonstrate how to implement a Retrieval-Augmented Generation (RAG) system. This involves embedding the question, retrieving relevant context from the indexed documents, augmenting the context with the question, and using a language model to generate an answer.

## 4.1 Import Libraries and Load Environment Variables

In [None]:
# Import necessary libraries
from azure.core.credentials import AzureKeyCredential
from dotenv import load_dotenv
from openai import AzureOpenAI
import os

# Load environment variables from .env file
load_dotenv()

# Get the service name and admin key from environment variables
service_name = os.getenv('AZURE_AI_SEARCH_SERVICE_NAME')
admin_key = os.getenv('AZURE_AI_SEARCH_ADMIN_KEY')

# Get the Azure OpenAI API details from environment variables
azure_openai_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
azure_openai_key = os.getenv('AZURE_OPENAI_KEY')
azure_openai_embedding_model = os.getenv('AZURE_OPENAI_EMBEDDING_MODEL_NAME')
azure_openai_embedding_deployment = os.getenv('AZURE_OPENAI_EMBEDDING_DEPLOYMENT')
azure_openai_api_version = os.getenv('AZURE_OPENAI_API_VERSION')
azure_openai_chat_deployment = os.getenv('AZURE_OPENAI_CHAT_DEPLOYMENT')  
azure_openai_chat_model = os.getenv('AZURE_OPENAI_CHAT_MODEL_NAME')  

# Use the service name and admin key as before
endpoint = f"https://{service_name}.search.windows.net"
credential = AzureKeyCredential(admin_key)

## 4.2 Initialize Azure AI Search and Azure OpenAI Clients

In [None]:
# Import the SearchClient from Azure SDK
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery

# Initialize the SearchClient
index_name = "example-index"
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

# Initialize the Azure OpenAI Client for embedding API  
embedding_client = AzureOpenAI(  
    azure_deployment=azure_openai_embedding_deployment,  
    api_version=azure_openai_api_version,  
    azure_endpoint=azure_openai_endpoint,  
    api_key=azure_openai_key  
)  
  
# Initialize the Azure OpenAI Client for chat API  
chat_client = AzureOpenAI(  
    azure_deployment=azure_openai_chat_deployment,  
    api_version=azure_openai_api_version,  
    azure_endpoint=azure_openai_endpoint,  
    api_key=azure_openai_key  
)  


## 4.3 Function to Generate Embeddings

In [None]:
# Function to generate embeddings using Azure OpenAI API  
def get_embedding(text, client):  
    response = client.embeddings.create(input=text, model=azure_openai_embedding_model)  
    return response.data[0].embedding  


## 4.4 Retrieve Context from Indexed Documents

In [None]:
# Function to retrieve context from indexed documents
def retrieve_context(query, client, search_client, top_k=3):
    embedding = get_embedding(query, client)
    vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=top_k, fields="contentVector")
    
    results = search_client.search(
        search_text=None,
        vector_queries=[vector_query],
        top=top_k
    )
    
    context = ""
    for result in results:
        context += result['content'] + "\n"
    
    return context

## 4.5 Generate Answer using Augmented Context

In [None]:
# Function to generate answer using augmented context  
def generate_answer(query, context, client):  
    messages = [  
        {"role": "system", "content": "You are a knowledgeable assistant that provides accurate and helpful information."},  
        {"role": "user", "content": query},  
        {"role": "assistant", "content": context}  
    ]  
    response = client.chat.completions.create(  
        model=azure_openai_chat_model,  # Use chat model  
        messages=messages,  
        max_tokens=150,  
        n=1,  
        stop=None,  
        temperature=0.7  
    )  
    return response.choices[0].message.content.strip()  


## 4.6 Example Usage of RAG

In [None]:
# Example usage  
query = "Why is the UK revoking the Burundi sanctions regime?"  
context = retrieve_context(query, embedding_client, search_client, top_k=3)  
answer = generate_answer(query, context, chat_client)  
  
print(f"Question: {query}")  
print(f"Context: {context}")  
print(f"Answer: {answer}")  


You can now use the RAG system to ask questions and get answers based on the context retrieved from your indexed documents.