- [Optimize Azure OpenAI Applications with Semantic Caching](https://techcommunity.microsoft.com/blog/azurearchitectureblog/optimize-azure-openai-applications-with-semantic-caching/4106867)
- [How to cache chat model responses](https://python.langchain.com/docs/how_to/chat_model_caching/)
- [Tutorial: Use Azure Cache for Redis as a semantic cache
](https://learn.microsoft.com/en-us/azure/azure-cache-for-redis/cache-tutorial-semantic-cache)
- [Enable semantic caching for Azure OpenAI APIs in Azure API Management](https://learn.microsoft.com/en-us/azure/api-management/azure-openai-enable-semantic-caching)

In [1]:
# Set up logging
import logging

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

In [2]:
# Load environment variables from .env
from dotenv import load_dotenv

assert load_dotenv(override=True), "Failed to load .env file"

In [3]:
# Set up LLM
from workshop_llm_agents.llms.azure_openai import AzureOpenAIWrapper

azure_openai_wrapper = AzureOpenAIWrapper()
llm = azure_openai_wrapper.get_azure_chat_openai()

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='/home/stakenaka/src/github.com/ks6088ts-labs/workshop-llm-agents/.venv/lib/python3.10/site-packages/certifi/cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='/home/stakenaka/src/github.com/ks6088ts-labs/workshop-llm-agents/.venv/lib/python3.10/site-packages/certifi/cacert.pem'


In [4]:
from langchain_core.caches import InMemoryCache
from langchain_core.globals import set_llm_cache

set_llm_cache(InMemoryCache())

In [5]:
%%time

response = llm.invoke("Tell me a joke")

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/deployments/gpt-4o/chat/completions', 'headers': {'api-key': '<redacted>'}, 'files': None, 'json_data': {'messages': [{'content': 'Tell me a joke', 'role': 'user'}], 'model': 'gpt-4o', 'stream': False, 'temperature': 0.0}}
DEBUG:openai._base_client:Sending HTTP Request: POST https://aoaiplaygroundsnih8leastus.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-10-21
DEBUG:httpcore.connection:connect_tcp.started host='aoaiplaygroundsnih8leastus.openai.azure.com' port=443 local_address=None timeout=None socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x7f47f8fa3400>
DEBUG:httpcore.connection:start_tls.started ssl_context=<ssl.SSLContext object at 0x7f47f91d6fc0> server_hostname='aoaiplaygroundsnih8leastus.openai.azure.com' timeout=None
DEBUG:httpcore.connection:start_tls.complete return_value=<httpcore._backends.sync

CPU times: user 110 ms, sys: 733 μs, total: 111 ms
Wall time: 1.4 s


In [6]:
%%time

llm.invoke("Tell me a joke")

CPU times: user 651 μs, sys: 50 μs, total: 701 μs
Wall time: 692 μs


AIMessage(content="Sure! Here's one for you:\n\nWhy don’t skeletons fight each other?\n\nBecause they don’t have the guts! 😄", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 11, 'total_tokens': 38, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_ded0d14823', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 