## Multi-Hop RAG

In [1]:
import os
import openai
from openai import AzureOpenAI
from typing import List, Dict, Any
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex,
    SimpleField,
    SearchField,
    SearchFieldDataType,
    SearchableField,
    VectorSearch,
    VectorSearchProfile,
    HnswAlgorithmConfiguration,
    VectorSearchAlgorithmKind,
    VectorSearchAlgorithmMetric,
    HnswParameters,
    SemanticSearch,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField
)
from azure.search.documents.models import VectorizedQuery
import time


# Replace with your actual Azure Cognitive Search endpoint and admin key


AZURE_SEARCH_ENDPOINT = "https://.search.windows.net"
AZURE_SEARCH_KEY =  ""
credential = AzureKeyCredential(AZURE_SEARCH_KEY)

client = AzureOpenAI(
    api_key = "",  
    api_version = "2024-02-01",
    azure_endpoint = "https://.openai.azure.com/" 
)
EMBEDDING_MODEL_NAME = "text-embedding-ada-002"
GPT_DEPLOYMENT_NAME = "gpt-4o"  # or your Azure GPT deployment

# 2. Create the Search Clients
INDEX_NAME = "product-documents-index"
search_index_client = SearchIndexClient(endpoint=AZURE_SEARCH_ENDPOINT, credential=credential)
search_client = SearchClient(endpoint=AZURE_SEARCH_ENDPOINT, index_name=INDEX_NAME, credential=credential)



In [5]:
def generate_embeddings_for_docs(texts: list[str]) -> list[list[float]]:
    # 'client' is your AzureOpenAI or OpenAI client
    response = client.embeddings.create(
        model=EMBEDDING_MODEL_NAME,
        input=texts
    )
    return [item.embedding for item in response.data]


In [7]:
def multi_hop_rag(query: str, max_steps: int = 3) -> str:
    """
    Multi-hop retrieval: 
      1. Decompose the question into sub-questions (chain-of-thought).
      2. Retrieve context for each sub-question.
      3. Generate partial answers and synthesize final answer.
    """

    decomposition_prompt = (
        "You are an expert at breaking down complex questions. "
        "Given the user question, list a few sub-questions (up to {max_steps}) "
        "that need to be answered to arrive at a final answer.\n\n"
        f"Question: {query}\n\n"
        "Sub-questions:"
    )

    decomposition_response = client.chat.completions.create(
        model=GPT_DEPLOYMENT_NAME,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": decomposition_prompt}
        ],
        temperature=0.2,
        max_tokens=300
    )

    decomposition_text = decomposition_response.choices[0].message.content
  
    sub_questions = [line.strip() for line in decomposition_text.split("\n") if line.strip()]
    
    partial_answers = []
    
    for sq_index, subq in enumerate(sub_questions, start=1):
        clean_subq = subq.strip("1234567890). ")
        if not clean_subq:
            continue

        # Vector retrieval
        query_embedding = generate_embeddings_for_docs([clean_subq])[0]  
        vector_query = VectorizedQuery(
            vector=query_embedding,
            k_nearest_neighbors=3,
            fields="content_vector"
        )
        results = search_client.search(
            search_text=None,
            vector_queries=[vector_query],
            select=["title", "text", "url"]
        )

        retrieved_texts = []
        for result in results:
            snippet = f"Title: {result['title']}\nText: {result['text']}\nURL: {result['url']}"
            retrieved_texts.append(snippet)
        context_str = "\n\n".join(retrieved_texts)

        partial_prompt = (
            f"Sub-question: {clean_subq}\n\n"
            "Relevant context:\n"
            f"{context_str}\n\n"
            "Provide a brief factual answer:"
        )

        partial_response = client.chat.completions.create(
            model=GPT_DEPLOYMENT_NAME,
            messages=[
                {"role": "system", "content": "You answer based on context. If unsure, say 'not sure'."},
                {"role": "user", "content": partial_prompt}
            ],
            temperature=0.2,
            max_tokens=200
        )

        partial_answer = partial_response.choices[0].message.content.strip()
        partial_answers.append((clean_subq, partial_answer))

    synthesis_context = "\n".join([f"Q: {sq}\nA: {ans}" for sq, ans in partial_answers])
    final_prompt = (
        f"The user asked: {query}\n\n"
        "We have the following partial answers from each sub-question:\n"
        f"{synthesis_context}\n\n"
        "Now synthesize a final answer to the original question as best as possible."
    )

    synthesis_response = client.chat.completions.create(
        model=GPT_DEPLOYMENT_NAME,
        messages=[
            {"role": "system", "content": "You are a thoughtful assistant. Combine partial answers to respond fully."},
            {"role": "user", "content": final_prompt}
        ],
        temperature=0.2,
        max_tokens=300
    )

    final_answer = synthesis_response.choices[0].message.content.strip()
    return final_answer

complex_question = "How does Azure AI Search use embeddings, and what steps are needed to set it up for semantic search?"
answer = multi_hop_rag(complex_question)
print("Final multi-hop answer:\n", answer)


Final multi-hop answer:
 Azure AI Search uses embeddings to enable semantic search, which enhances search capabilities by understanding the context and meaning behind search queries and documents, rather than just matching keywords. This is achieved through vector search, where text is converted into dense vector representations that capture semantic meaning, allowing for more accurate and relevant search results.

To set up Azure AI Search for semantic search, follow these steps:

1. **Provision Azure Cognitive Search Service**: 
   - Go to the Azure portal.
   - Select "Create a resource" and search for "Azure Cognitive Search".
   - Click "Create" and fill in the required details such as the subscription, resource group, and service name.
   - Choose the pricing tier and region.
   - Review and create the service.

2. **Create and Configure an Index**:
   - Define the schema for your search index, including fields that will store the embeddings.
   - Set up indexes by defining the s

## IRCoT

In [12]:
import openai
from openai import AzureOpenAI
from typing import List, Dict, Any
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery



AZURE_OPENAI_API_KEY = ""
AZURE_OPENAI_ENDPOINT = "https://.openai.azure.com/" 
AZURE_OPENAI_API_VERSION = "2024-02-01"
GPT_MODEL_DEPLOYMENT = "gpt-4o"

# Azure Cognitive Search
INDEX_NAME = "product-documents-index"
search_index_client = SearchIndexClient(endpoint=AZURE_SEARCH_ENDPOINT, credential=credential)
search_client = SearchClient(endpoint=AZURE_SEARCH_ENDPOINT, index_name=INDEX_NAME, credential=credential)

# Initialize the AzureOpenAI client
openai.api_type = "azure" 
client = AzureOpenAI(
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=AZURE_OPENAI_API_VERSION
)

def generate_embeddings_for_query(query: str) -> List[float]:
    """
    For a single query string, return exactly one embedding list of floats.
    """
    response = client.embeddings.create(
        model="text-embedding-ada-002", 
        input=[query]
    )
    return response.data[0].embedding 

def retrieve_subquestion_context(subquestion: str, top_k: int = 3) -> str:
    """
    Retrieve relevant context from Azure Cognitive Search using vector-based retrieval.
    Returns a concatenated string of top documents.
    """
    embedding = generate_embeddings_for_query(subquestion)
    vector_query = VectorizedQuery(
        vector=embedding,
        k_nearest_neighbors=top_k,
        fields="content_vector"  
    )
    results = search_client.search(
        search_text=None,
        vector_queries=[vector_query],
        select=["title", "text"]
    )

    docs = []
    for r in results:
        doc_text = f"Title: {r['title']}\nText: {r['text']}"
        docs.append(doc_text)
    return "\n\n".join(docs)

# ------------------------------------------------------------------
# 2. IRCoT IMPLEMENTATION
# ------------------------------------------------------------------

def ircot(question: str, max_iterations: int = 5) -> str:
    """
    Iterative Retrieval Chain-of-Thought (IRCoT):
      - The model can explicitly request more info by outputting 'NEED: <subquestion>'
      - Once satisfied, it outputs 'DONE: <final answer>'
      - We feed retrieved context to the model in subsequent iterations until 'DONE' is reached or we exceed max_iterations.
    """
    conversation = []
    system_instructions = (
        "You are a chain-of-thought reasoner with the ability to request additional context.\n"
        "If you need more context, write: 'NEED: <subquestion>'.\n"
        "When you have enough info, finalize with: 'DONE: <final answer>'.\n\n"
        "Important: The user only sees your final answer (after 'DONE:'). "
        "Your chain-of-thought and 'NEED:' lines are hidden from the user.\n"
    )


    conversation.append({"role": "system", "content": system_instructions})

    conversation.append({"role": "user", "content": f"User question: {question}"})

    for iteration in range(max_iterations):
        print("====== ITERATION: " + str(iteration))
        # Call the model
        response = client.chat.completions.create(
            model=GPT_MODEL_DEPLOYMENT,
            messages=conversation,
            temperature=0.2,
            max_tokens=400
        )
        assistant_reply = response.choices[0].message.content.strip()

        conversation.append({"role": "assistant", "content": assistant_reply})

        if "NEED:" in assistant_reply:
            
       
            idx = assistant_reply.find("NEED:") + len("NEED:")
            subquestion = assistant_reply[idx:].strip()
            print("------ NEED: " + subquestion)
            
            # Retrieve context from Azure Search
            retrieved_text = retrieve_subquestion_context(subquestion)

       
            context_msg = (
                f"Here is additional context for your subquestion '{subquestion}':\n\n"
                f"{retrieved_text}\n\n"
                "You can now continue your chain-of-thought."
            )
            conversation.append({"role": "system", "content": context_msg})
            print("*************** conversation: " + str(conversation))

        elif "DONE:" in assistant_reply:
            idx = assistant_reply.find("DONE:") + len("DONE:")
            final_answer = assistant_reply[idx:].strip()
            return final_answer  # End the IRCoT loop

        else:
            pass

    return "I'm sorry, I couldn't arrive at a final answer within the iteration limit."

# ------------------------------------------------------------------
# 3. EXAMPLE USAGE
# ------------------------------------------------------------------

if __name__ == "__main__":
    complex_query = (
        "Explain how Azure AI Search uses embeddings, and if I need to enable vector search. "
        "Then tell me the steps to set up semantic configuration."
    )

    final_answer = ircot(complex_query, max_iterations=5)
    print("===== FINAL ANSWER =====")
    print(final_answer)


------ NEED: How does Azure AI Search use embeddings? 

NEED: Do I need to enable vector search in Azure AI Search? 

NEED: What are the steps to set up semantic configuration in Azure AI Search?
*************** conversation: [{'role': 'system', 'content': "You are a chain-of-thought reasoner with the ability to request additional context.\nIf you need more context, write: 'NEED: <subquestion>'.\nWhen you have enough info, finalize with: 'DONE: <final answer>'.\n\nImportant: The user only sees your final answer (after 'DONE:'). Your chain-of-thought and 'NEED:' lines are hidden from the user.\n"}, {'role': 'user', 'content': 'User question: Explain how Azure AI Search uses embeddings, and if I need to enable vector search. Then tell me the steps to set up semantic configuration.'}, {'role': 'assistant', 'content': 'NEED: How does Azure AI Search use embeddings? \n\nNEED: Do I need to enable vector search in Azure AI Search? \n\nNEED: What are the steps to set up semantic configuration 

In [13]:
if __name__ == "__main__":
    complex_query = (
        "What's the best chunking strategy?"
     
    )

    final_answer = ircot(complex_query, max_iterations=5)
    print("------ FINAL ANSWER ------")
    print(final_answer)


------ NEED: What specific context or application are you referring to for chunking (e.g., text processing, memory improvement, data analysis)?
*************** conversation: [{'role': 'system', 'content': "You are a chain-of-thought reasoner with the ability to request additional context.\nIf you need more context, write: 'NEED: <subquestion>'.\nWhen you have enough info, finalize with: 'DONE: <final answer>'.\n\nImportant: The user only sees your final answer (after 'DONE:'). Your chain-of-thought and 'NEED:' lines are hidden from the user.\n"}, {'role': 'user', 'content': "User question: What's the best chunking strategy?"}, {'role': 'assistant', 'content': 'NEED: What specific context or application are you referring to for chunking (e.g., text processing, memory improvement, data analysis)?'}, {'role': 'system', 'content': "Here is additional context for your subquestion 'What specific context or application are you referring to for chunking (e.g., text processing, memory improveme