In [1]:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
import os

load_dotenv(override=True) # take environment variables from .env.

# Variables not used here do not need to be updated in your .env file
endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
key_credential = os.environ["AZURE_SEARCH_ADMIN_KEY"] if len(os.environ["AZURE_SEARCH_ADMIN_KEY"]) > 0 else None
index_name = os.environ["AZURE_SEARCH_INDEX"]
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_key = os.environ["AZURE_OPENAI_API_KEY"] if len(os.environ["AZURE_OPENAI_API_KEY"]) > 0 else None
azure_openai_embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
azure_openai_api_version = os.environ["AZURE_OPENAI_API_VERSION"]

credential = key_credential or DefaultAzureCredential()

In [2]:
from langsmith import Client
client = Client()

os.environ['LANGCHAIN_PROJECT'] = "contoso-coffee-chain"

In [3]:
from langchain_openai import AzureOpenAIEmbeddings
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

openai_credential = DefaultAzureCredential()
token_provider = get_bearer_token_provider(openai_credential, "https://cognitiveservices.azure.com/.default")

# Use API key if provided, otherwise use RBAC authentication
embeddings = AzureOpenAIEmbeddings(
    azure_deployment=azure_openai_embedding_deployment,
    openai_api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key,
    azure_ad_token_provider=token_provider if not azure_openai_key else None
)   

In [4]:
from langchain.vectorstores.azuresearch import AzureSearch

vector_store = AzureSearch(
    azure_search_endpoint=endpoint,
    azure_search_key=key_credential,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
    semantic_configuration_name="contoso-semantic-config"
)

### Similarity Vs. Relevance Test

In [13]:
# Perform a vector search
docs = vector_store.vector_search(
    # query = "What are your famous bakery items?", 
    # query = "I am in mood for French today. What are your french options?",
    query = "Can I place an order for 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?",  
    k=10,
)

for doc in docs:
    print(doc.page_content)

 
        Item Name: Mocha
        Mocha details:
            Price: 4.5$
            Category: Coffees
            Description: Delicious combination of coffee, milk and chocolate.
    
 
        Item Name: Blueberry Smoothie
        Blueberry Smoothie details:
            Price: 5.5$
            Category: Smoothies
            Description: Delicious smoothie made with fresh blueberries.
    
 
        Item Name: Lemon Cake
        Lemon Cake details:
            Price: 4.0$
            Category: Bakery
            Description: Sweet and tangy lemon cake with a moist crumb.
    
 
        Item Name: Chocolate Smoothie
        Chocolate Smoothie details:
            Price: 6.0$
            Category: Smoothies
            Description: Rich and creamy smoothie made with chocolate.
    
 
        Item Name: Blueberry Muffin
        Blueberry Muffin details:
            Price: 2.5$
            Category: Bakery
            Description: Soft and moist muffin packed with fresh blueberries and

In [15]:
# Perform a hybrid search
docs = vector_store.hybrid_search(
    # query = "What are your famous bakery items?", 
    # query = "I am in mood for French today. What are your french options?",
    query = "Can I place an order for 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?",  
    k=10,
)

for doc in docs:
    print(doc.page_content)

 
        Item Name: Lemon Cake
        Lemon Cake details:
            Price: 4.0$
            Category: Bakery
            Description: Sweet and tangy lemon cake with a moist crumb.
    
 
        Item Name: Chicken Sandwich
        Chicken Sandwich details:
            Price: 5.5$
            Category: Sandwiches
            Description: Delicious sandwich filled with chicken and veggies.
    
 
        Item Name: Strawberry Banana Smoothie
        Strawberry Banana Smoothie details:
            Price: 5.0$
            Category: Smoothies
            Description: Refreshing smoothie made with strawberries and bananas.
    
 
        Item Name: Mocha
        Mocha details:
            Price: 4.5$
            Category: Coffees
            Description: Delicious combination of coffee, milk and chocolate.
    
 
        Item Name: Chocolate Smoothie
        Chocolate Smoothie details:
            Price: 6.0$
            Category: Smoothies
            Description: Rich and creamy smoot

In [16]:
# Perform a semantic hybrid search
vector_store.semantic_configuration_name = "contoso-semantic-config"
docs = vector_store.semantic_hybrid_search(
    # query = "What are your famous bakery items?", 
    query = "Can I place an order for 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?",  
    k=10,
)

for doc in docs:
    print(doc.page_content)

 
        Item Name: Lemon Cake
        Lemon Cake details:
            Price: 4.0$
            Category: Bakery
            Description: Sweet and tangy lemon cake with a moist crumb.
    
 
        Item Name: Mocha
        Mocha details:
            Price: 4.5$
            Category: Coffees
            Description: Delicious combination of coffee, milk and chocolate.
    
 
        Item Name: Bagel
        Bagel details:
            Price: 1.5$
            Category: Bakery
            Description: Delicious, dense and chewy bread product, perfect for a quick breakfast.
    
 
        Item Name: Lemon Tea
        Lemon Tea details:
            Price: 2.5$
            Category: Tea or Chai
            Description: A tea with a lemon flavor.
    
 
        Item Name: Carrot Cake
        Carrot Cake details:
            Price: 4.0$
            Category: Bakery
            Description: Moist, sweet cake with a rich carrot and cinnamon flavor.
    
 
        Item Name: Blueberry Muffin
   

In [41]:
retriever = vector_store.as_retriever(k=10)
query = "Can I place an order for 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?"
docs = retriever.invoke(query)
len(docs)

10

## Query Transformations

Query transformations are a set of approaches focused on re-writing and/or modifying questions for retrieval

In [19]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "coffee-retrieval-techniques"

In [42]:
from langchain_core.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database using hybrid search. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai.chat_models import AzureChatOpenAI
llm = AzureChatOpenAI(
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_deployment="gpt-35-turbo",
    streaming=True,
)

generate_queries = (
    prompt_perspectives 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [43]:
question = "Can I place an order for 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?"
generate_queries.invoke(question)

['1. How can I order 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?',
 '2. What is the process for placing an order for 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?',
 '3. Is it possible to order 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?',
 '4. How do I go about ordering 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?',
 '5. What are the steps to place an order for 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?']

In [52]:
retriever = vector_store.as_retriever(k=10) # Even with Query Transformation, K = 8 or 9 is not enough to get relevant results

mid_chain = generate_queries | retriever.map() 
output  = mid_chain.invoke(question)
for docs in output:
    print(len(docs))

9
9
9
9
9


In [53]:
from langchain_core.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "Can I place an order for 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
retrieval_chain

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='You are an AI language model assistant. Your task is to generate five \ndifferent versions of the given user question to retrieve relevant documents from a vector \ndatabase using hybrid search. By generating multiple perspectives on the user question, your goal is to help\nthe user overcome some of the limitations of the distance-based similarity search. \nProvide these alternative questions separated by newlines. Original question: {question}'))])
| AzureChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001C12B913F90>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001C127ABDBD0>, openai_api_key=SecretStr('**********'), openai_proxy='', streaming=True, azure_endpoint='https://openaishiva.openai.azure.com/', deployment_name='gpt-35-turbo', openai_api_version='2024-02-01', opena

In [54]:
docs = retrieval_chain.invoke({"question":question})

for doc in docs:
    print(doc.page_content)

 
        Item Name: Pineapple Smoothie
        Pineapple Smoothie details:
            Price: 5.5$
            Category: Smoothies
            Description: Refreshing smoothie made with fresh pineapple.
    
 
        Item Name: Blueberry Muffin
        Blueberry Muffin details:
            Price: 2.5$
            Category: Bakery
            Description: Soft and moist muffin packed with fresh blueberries and a crumb topping.
    
 
        Item Name: Peach Smoothie
        Peach Smoothie details:
            Price: 5.5$
            Category: Smoothies
            Description: Delicious smoothie made with fresh peaches.
    
 
        Item Name: Blueberry Smoothie
        Blueberry Smoothie details:
            Price: 5.5$
            Category: Smoothies
            Description: Delicious smoothie made with fresh blueberries.
    
 
        Item Name: Strawberry Banana Smoothie
        Strawberry Banana Smoothie details:
            Price: 5.0$
            Category: Smoothies
       

In [55]:
len(docs)

10

## RAG Fusion

In [59]:
from langchain_core.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. Do NOT change the meaning of the question \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [60]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_rag_fusion 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [61]:
generate_queries.invoke({"question": question})

['1. How to place an order for a Mocha?',
 '2. Where can I order a lemon cake?',
 '3. Best places to get a blueberry smoothie?',
 '4. How can I order a Chicken sandwich?']

In [63]:
from langchain_core.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

In [67]:
retriever = vector_store.as_retriever(k=1)

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

4

In [68]:
for doc in docs:
    print(doc[0].page_content)

 
        Item Name: Mocha
        Mocha details:
            Price: 4.5$
            Category: Coffees
            Description: Delicious combination of coffee, milk and chocolate.
    
 
        Item Name: Lemon Cake
        Lemon Cake details:
            Price: 4.0$
            Category: Bakery
            Description: Sweet and tangy lemon cake with a moist crumb.
    
 
        Item Name: Blueberry Smoothie
        Blueberry Smoothie details:
            Price: 5.5$
            Category: Smoothies
            Description: Delicious smoothie made with fresh blueberries.
    
 
        Item Name: Chicken Sandwich
        Chicken Sandwich details:
            Price: 5.5$
            Category: Sandwiches
            Description: Delicious sandwich filled with chicken and veggies.
    


In [70]:
retriever = vector_store.as_retriever(k=5)

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
question = "what french options do you have?"
docs = retrieval_chain_rag_fusion.invoke({"question": question})
for doc in docs:
    print(doc[0].page_content)

 
        Item Name: Pain au Chocolat
        Pain au Chocolat details:
            Price: 3.5$
            Category: Bakery
            Description: Classic French pastry filled with chocolate.
    
 
        Item Name: Croissant
        Croissant details:
            Price: 2.0$
            Category: Bakery
            Description: Buttery, flaky pastry, freshly baked to golden perfection.
    
 
        Item Name: Chamomile Tea
        Chamomile Tea details:
            Price: 2.5$
            Category: Tea or Chai
            Description: A herbal tea made from the flowers of the chamomile plant.
    
 
        Item Name: Cortado
        Cortado details:
            Price: 3.5$
            Category: Coffees
            Description: A beverage consisting of espresso mixed with a roughly equal amount of warm milk to reduce the acidity.
    
 
        Item Name: Cafe au Lait
        Cafe au Lait details:
            Price: 3.5$
            Category: Coffees
            Description: A 

## Decomposition

### Answer recursively  

* https://arxiv.org/pdf/2205.10625.pdf
* https://arxiv.org/abs/2212.10509.pdf

### Answer individually 


In [71]:
from langchain_core.prompts import ChatPromptTemplate

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [72]:
# Chain
generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

# Run
question = "Can I place an order for 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?"
questions = generate_queries_decomposition.invoke({"question":question})
questions

['1. Where can I order a Mocha?',
 '2. Where can I order a lemon cake?',
 '3. Where can I order a blueberry smoothie and a Chicken sandwich?']

In [73]:
# Run
question = "What french options do you have?"
questions = generate_queries_decomposition.invoke({"question":question})
questions

['1. What are some French cuisine options?',
 '2. What are some French language learning options?',
 '3. What are some French travel options?']

May not be so suitable for this use case

## Step Back

## Part 8: Step Back

Paper: 

* https://arxiv.org/pdf/2310.06117.pdf

### HyDE

In [74]:
from langchain_core.prompts import ChatPromptTemplate

# HyDE document genration
template = """Please write a scientific paper passage to answer the question
Question: {question}
Passage:"""
prompt_hyde = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import AzureChatOpenAI

generate_docs_for_retrieval = (
    prompt_hyde | llm | StrOutputParser() 
)

In [77]:
# Run
question = "Can I place an order for 1 Mocha, 1 lemon cake, 1 blueberry smoothie, and 1 Chicken sandwich?"
docs = generate_docs_for_retrieval.invoke({"question":question})
print(docs)

Title: Analysis of Placing an Order for Multiple Food Items

Abstract:
This scientific paper aims to investigate the feasibility of placing an order for one Mocha, one lemon cake, one blueberry smoothie, and one Chicken sandwich. By examining the compatibility of these food items, their nutritional content, and potential interactions, we can determine whether this order is suitable for consumption.

Introduction:
In recent years, the popularity of ordering multiple food items has risen significantly. However, it is essential to consider the compatibility and potential health implications of consuming different food items together. This paper delves into the nutritional aspects and potential interactions of a specific order consisting of one Mocha, one lemon cake, one blueberry smoothie, and one Chicken sandwich.

Methods:
To evaluate the compatibility of the chosen food items, we conducted a thorough analysis of their nutritional content and potential interactions. We gathered data fro

In [84]:
# Retrieve
retriever = vector_store.as_retriever(k=8)

retrieval_chain = generate_docs_for_retrieval | retriever 
retrieved_docs = retrieval_chain.invoke({"question":question})

for doc in retrieved_docs:
    print(doc.page_content)

 
        Item Name: Chicken Sandwich
        Chicken Sandwich details:
            Price: 5.5$
            Category: Sandwiches
            Description: Delicious sandwich filled with chicken and veggies.
    
 
        Item Name: Mocha
        Mocha details:
            Price: 4.5$
            Category: Coffees
            Description: Delicious combination of coffee, milk and chocolate.
    
 
        Item Name: Strawberry Banana Smoothie
        Strawberry Banana Smoothie details:
            Price: 5.0$
            Category: Smoothies
            Description: Refreshing smoothie made with strawberries and bananas.
    
 
        Item Name: Chocolate Smoothie
        Chocolate Smoothie details:
            Price: 6.0$
            Category: Smoothies
            Description: Rich and creamy smoothie made with chocolate.
    
 
        Item Name: Mango Banana Smoothie
        Mango Banana Smoothie details:
            Price: 5.5$
            Category: Smoothies
            Descriptio

## Contextual Compression

In [87]:
# Helper function for printing docs


def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [88]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

retriever = vector_store.as_retriever(k=10)
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)
compressed_docs = compression_retriever.invoke(question)
pretty_print_docs(compressed_docs)



Document 1:

Item Name: Lemon Cake
Lemon Cake details:
    Price: 4.0$
    Category: Bakery
    Description: Sweet and tangy lemon cake with a moist crumb.
----------------------------------------------------------------------------------------------------
Document 2:

Chicken Sandwich
Price: 5.5$
Category: Sandwiches
Description: Delicious sandwich filled with chicken and veggies.
----------------------------------------------------------------------------------------------------
Document 3:

Item Name: Strawberry Banana Smoothie
Strawberry Banana Smoothie details:
Price: 5.0$
Category: Smoothies
Description: Refreshing smoothie made with strawberries and bananas.
----------------------------------------------------------------------------------------------------
Document 4:

Item Name: Mocha
----------------------------------------------------------------------------------------------------
Document 5:

Item Name: Chocolate Smoothie
Price: 6.0$
Category: Smoothies
Description: Rich a

In [91]:
from langchain.retrievers.document_compressors import LLMChainFilter
# from warnings import filterwarnings
# filterwarnings("ignore")

_filter = LLMChainFilter.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(question)
pretty_print_docs(compressed_docs)

Document 1:

 
        Item Name: Lemon Cake
        Lemon Cake details:
            Price: 4.0$
            Category: Bakery
            Description: Sweet and tangy lemon cake with a moist crumb.
    
----------------------------------------------------------------------------------------------------
Document 2:

 
        Item Name: Chicken Sandwich
        Chicken Sandwich details:
            Price: 5.5$
            Category: Sandwiches
            Description: Delicious sandwich filled with chicken and veggies.
    
----------------------------------------------------------------------------------------------------
Document 3:

 
        Item Name: Mocha
        Mocha details:
            Price: 4.5$
            Category: Coffees
            Description: Delicious combination of coffee, milk and chocolate.
    
----------------------------------------------------------------------------------------------------
Document 4:

 
        Item Name: Blueberry Smoothie
        Blueberry

In [100]:
from langchain.retrievers.document_compressors import EmbeddingsFilter

embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.42)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(question)
pretty_print_docs(compressed_docs)

## Not working so well and hard to figure out the similarity_threshold

Document 1:

 
        Item Name: Mocha
        Mocha details:
            Price: 4.5$
            Category: Coffees
            Description: Delicious combination of coffee, milk and chocolate.
    
----------------------------------------------------------------------------------------------------
Document 2:

 
        Item Name: Blueberry Smoothie
        Blueberry Smoothie details:
            Price: 5.5$
            Category: Smoothies
            Description: Delicious smoothie made with fresh blueberries.
    
----------------------------------------------------------------------------------------------------
Document 3:

 
        Item Name: Lemon Cake
        Lemon Cake details:
            Price: 4.0$
            Category: Bakery
            Description: Sweet and tangy lemon cake with a moist crumb.
    
----------------------------------------------------------------------------------------------------
Document 4:

 
        Item Name: Chocolate Smoothie
        Chocolate 