## 1. Set environment vairable for MemoryDB cluster 

## 2. Install packages

In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install langchain_core
!{sys.executable} -m pip install langchain_aws
!{sys.executable} -m pip install redis

In [None]:
import os
from langchain_core.globals import set_llm_cache
from langchain_aws import InMemorySemanticCache
from langchain_aws import ChatBedrock
from langchain_aws.embeddings import BedrockEmbeddings
import redis
from redis.cluster import RedisCluster as MemoryDB

## Initialize the ChatBedrock and embeddings 

In [None]:
# create the Anthropic Model
model_kwargs = {
    "temperature": 0, 
    "top_k": 250, 
    "top_p": 1,
    "stop_sequences": ["\\n\\nHuman:"]
}    

In [None]:
# use the Anthropic Claude model
llm = ChatBedrock(
    model_id="anthropic.claude-3-sonnet-20240229-v1:0",
    model_kwargs=model_kwargs
)


In [None]:
# create a Titan Embeddings client
embeddings = BedrockEmbeddings()

## Connect to MemoryDB

In [None]:
%%time
memorydb_host = os.environ.get("MEMORYDB_HOST", "localhost")
memorydb_port = os.environ.get("MEMORYDB_PORT", 6379)
# print(f"MemoryDB Url = {memorydb_host}:{memorydb_port}")
rc = MemoryDB(host=memorydb_host, port=memorydb_port, ssl=False, decode_responses=False, ssl_cert_reqs="none")
rc.ping()
#rc.flushall()

## Submit a query  without setting up cache

In [None]:
%%time
response=llm.invoke("Tell me about mission to moon")
print(response.content)

## Enable MemoryDB for durable semantic caching 

In [None]:
set_llm_cache(
    InMemorySemanticCache(redis_url=f"redis://{memorydb_host}:{memorydb_port}/ssl=True&ssl_cert_reqs=none",
                embedding=embeddings)
)

### Submit a query to the LLM and Re-run the same block to see the improvemnt in response time. 

In [None]:
%%time
response=llm.invoke("Tell me about mission to moon")
print(response.content)

In [None]:
%%time
response=llm.invoke("Who first invented a telescope")
print(response.content)

In [None]:
%%time
response=llm.invoke("Who first invented a car")
print(response.content)

In [None]:
%%time
respone3=llm.invoke("Who  first  a Telescope")
print(respone3.content)