**Azure Cache for Redis Enterprise - Smeantic Caching**
This workbook showcases the usage of Azure Cache for Redis enterprise as a smeantic caching layer



In [None]:
#Installing the required packages
#This script is used to install the required packages for the project
%pip install requests
%pip install redis
%pip install numpy
%pip install python-dotenv
%pip install redisvl

In [None]:
import json
import requests
import os
import getpass
import time
import redis
import numpy as np
from dotenv import load_dotenv
from redisvl.extensions.llmcache import SemanticCache

# load environment variables from .env file
load_dotenv(override=True)
# make sure you have a .env file in the same directory as this notebook with the following variables set:
AZURE_OPENAI_ENDPOINT=os.getenv("AZURE_OPENAI_ENDPOINT")
API_KEY=os.getenv("AZURE_OPENAI_API_KEY")
API_VERSION=os.getenv("OPENAI_API_VERSION")
CHATCOMPLETION_DEPLOYMENT_NAME=os.getenv("AZURE_OPENAI_COMPLETION_DEPLOYMENT_NAME")
CHATCOMPLETION_MODEL_NAME=os.getenv("AZURE_OPENAI_COMPLETION_MODEL_NAME")
EMBEDDINGS_DEPLOYMENT_NAME=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
EMBEDDINGS_MODEL_NAME=os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_NAME")
  
API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
API_VERSION = os.getenv("OPENAI_API_VERSION")
RESOURCE_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
DEPLOYMENT_ID = os.getenv("AZURE_OPENAI_COMPLETION_DEPLOYMENT_NAME")
url = RESOURCE_ENDPOINT + "/openai/deployments/" + DEPLOYMENT_ID + "/chat/completions?api-version=" + API_VERSION

#print(url)

AZURE_OPENAI_ENDPOINT:  https://skd-apim-test.azure-api.net/aigatewaylandingzonesample/
https://skd-apim-test.azure-api.net/aigatewaylandingzonesample//openai/deployments/skd-openaichat-gpt35/chat/completions?api-version=2024-02-01


In [10]:
# Create redis url for creation of a new cache index
REDIS_ENDPOINT = os.getenv("REDIS_ENDPOINT")
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD")

if REDIS_ENDPOINT and REDIS_PASSWORD:
	redis_url = "rediss://:" + REDIS_PASSWORD + "@" + REDIS_ENDPOINT
	#print("Redis URL: " + redis_url)
else:
	print("Error: REDIS_ENDPOINT or REDIS_PASSWORD environment variable is not set.")
	redis_url = None

In [4]:
# Create the semantic cache index, do not run this multiple times
llmcache = SemanticCache(
    name="llmcache-demo-cts1234",                     # underlying search index name
    redis_url=redis_url,                             # redis connection url string
    distance_threshold=0.3,                          # semantic cache distance threshold
    embedding_provider="azure_openai",               # specify embedding provider as Azure OpenAI
    embedding_config={
        "api_key": API_KEY,
        "api_version": API_VERSION,
        "azure_endpoint": AZURE_OPENAI_ENDPOINT,
        "deployment_id": EMBEDDINGS_DEPLOYMENT_NAME
    }
)

  from tqdm.autonotebook import tqdm, trange


In [None]:
import time

# Invoke Azure OpenAI Chat Completion API without semantic cache and note the response time.

# Get user prompt
userprompt = input("Enter your prompt: ")

# Check if response exists in semantic cache
start_time = time.time()
if r := llmcache.check(prompt=userprompt, return_fields=["response"]):
    print("Cache hit, returning response from cache")
    print(f"Prompt: {userprompt}\nResponse: {r}")
else:
    print("Empty cache, calling LLM to generate response")
    # Call Azure OpenAI API for new response
    r = requests.post(
        url, 
        headers={"api-key": API_KEY}, 
        json={
            "messages":[
                {"role": "assistant", "content": "You are an AI assistant that helps people find information. "}, 
                {"role": "user", "content": userprompt}
            ]
        }
    )
    # Extract the response
    response_content = r.json()["choices"][0]["message"]["content"]
    # Store the response in semantic cache
    print("Store the response from LLM in cache")
    llmcache.store(
        prompt=userprompt,
        response=response_content
    )
    # Display the response
    print(f"Prompt: {userprompt}\nResponse: {response_content}")

end_time = time.time()
execution_time = end_time - start_time
print(f"\nExecution time: {execution_time:.4f} seconds")


Cache hit, returning response from cache
Prompt: What are the top 5 scientific breakthroughs of 19th century
Response: [{'response': '1. Theory of Evolution by Natural Selection (Charles Darwin, 1859)\n2. Discovery of the Electron (J.J. Thomson, 1897)\n3. Germ Theory of Disease (Louis Pasteur, 1861)\n4. Periodic Table of Elements (Dmitri Mendeleev, 1869)\n5. Conservation of Energy (James Joule, 1843)', 'key': 'llmcache-demo-cts1234:c24005385aceff5e0083c0ca97aca4ec5631856dd468007144072fad7c65390b'}]
