# Introduction

In this tutorial, we'll demonstrate how to leverage a semantic caching with Azure Cosmos DB for MongoDB and LangChain.

[Learn more here from the LangChain docs.](https://python.langchain.com/docs/integrations/llms/llm_caching#azure-cosmos-db-semantic-cache)

In [None]:
# ! pip install langchain
# ! pip install langchain_openai
# ! pip install langchain_community
# ! pip install pymongo
# ! pip install python-dotenv
# ! pip install azure-core
# ! pip install azure-cosmos

In [None]:
from langchain_community.cache import AzureCosmosDBSemanticCache
from langchain.globals import set_llm_cache
import urllib 

AzureCosmosDBSemanticCache
from langchain_community.vectorstores.azure_cosmos_db import (
    CosmosDBSimilarityType,
    CosmosDBVectorSearchType,
)

from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.prompts import PromptTemplate

from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain.globals import set_llm_cache

import pymongo
from dotenv import load_dotenv

In [None]:
from dotenv import dotenv_values

# specify the name of the .env file name 
env_name = "example.env" # following example.env template change to your own .env file name
config = dotenv_values(env_name)

COSMOS_MONGO_USER = config['cosmos_db_mongo_user']
COSMOS_MONGO_PWD = config['cosmos_db_mongo_pwd']
COSMOS_MONGO_SERVER = config['cosmos_db_mongo_server']
DIMENSIONS = int(config['openai_embeddings_dimensions'])       

In [None]:
INDEX_NAME = "VectorSearchIndex"
NAMESPACE = "ExampleDB.CachingCollection"

CONNECTION_STRING = ("mongodb+srv://"+COSMOS_MONGO_USER+":"+COSMOS_MONGO_PWD+"@"+COSMOS_MONGO_SERVER+"?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000")

DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
mongo_client = pymongo.MongoClient(CONNECTION_STRING)


In [None]:
def init_llm_chain():

    # Clear old cache if it exists
    mongo_client[DB_NAME][COLLECTION_NAME].drop_indexes()
    mongo_client[DB_NAME].drop_collection(COLLECTION_NAME)

    # Define a template for the LLM prompt
    prompt_template = """
    You are an upbeat AI assistant who is excited to help answer questions. 

    Question: {question}
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    """
    chatbot_prompt = PromptTemplate(
        template = prompt_template, input_variables = ["question", "context"])

    # Requires model version 0301 or more recent
    # Point to completions model deployed in Azure OpenAI
    llm = AzureChatOpenAI(
        deployment_name=config['openai_completions_deployment'],
        model_name=config['openai_completions_model'],
        api_key=config['openai_api_key'],
        azure_endpoint=config['openai_api_endpoint'],
        api_version=config['openai_api_version'],
        cache=True,
        n=1)

    # Point to embeddings model deployed in Azure OpenAI
    embeddings = AzureOpenAIEmbeddings(
        azure_deployment=config['openai_embeddings_deployment'],
        model=config['openai_embeddings_model'],
        api_key=config['openai_api_key'],
        azure_endpoint=config['openai_api_endpoint'],
        dimensions=DIMENSIONS)

    # Setup simple LLM chain
    llm_chain = LLMChain(llm = llm, prompt=chatbot_prompt)

    # Setup semantic cache for LLM
    num_lists = 1
    similarity_algorithm = CosmosDBSimilarityType.COS
    kind = CosmosDBVectorSearchType.VECTOR_IVF

    score_threshold = 0.9

    sem_cache = AzureCosmosDBSemanticCache(
            cosmosdb_connection_string=CONNECTION_STRING,
            cosmosdb_client=None,
            embedding=embeddings,
            database_name=DB_NAME,
            collection_name=COLLECTION_NAME,
            num_lists=num_lists,
            similarity=similarity_algorithm,
            kind=kind,
            dimensions=DIMENSIONS,
            score_threshold=score_threshold)

    set_llm_cache(sem_cache)

    return llm_chain

In [None]:
# Initialize llm chain
llm_chain = init_llm_chain()

In [None]:
%%time
# The first time, the quesiton/response is not yet cachced in Cosmos DB, so retrieval should be slower
llm_chain.invoke("Tell me something interesting about beer making")

In [None]:
%%time
# This quesiton/response is not yet cachced in Cosmos DB, so retrieval should be slower
llm_chain("Tell me a joke about tomatoes and food.")

In [None]:
%%time
# The second time, the quesiton/response is cached in Cosmos DB, so retrieval should be faster
llm_chain("Tell me something interesting about beer making")

In [None]:
%%time
# This question is semantically similar to the previous one within the score_threshold amount, so retrieval should be faster
llm_chain("How do I make beer?")