## Initialize environment

In [1]:
from dotenv import load_dotenv

load_dotenv(dotenv_path=".env")

True

## Initialize Embeddings & Vector Store

We use AWS Bedrock Embeddings and Azure AI search as vector store.

In [2]:
import os
from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.vectorstores.azuresearch import AzureSearch


class AWSBedrockEmbeddings:

    def __init__(self):
        self._embeddings = None
        self._validate_aws_env_variables()
        self._region_name = os.environ["AWS_REGION"]
        self._model_id = os.environ["AWS_LLM_EMBEDDINGS_ID"]
        self.initialize_embeddings()

    def initialize_embeddings(self):
        self._embeddings = BedrockEmbeddings(region_name=self._region_name, model_id=self._model_id)

    @property
    def region_name(self):
        return self._region_name

    @property
    def model_id(self):
        return self._model_id

    @property
    def embeddings(self):
        return self._embeddings

    def _validate_aws_env_variables(self):
        if "AWS_REGION" not in os.environ:
            raise ValueError("AWS_REGION environment variable not set")
        if "AWS_LLM_EMBEDDINGS_ID" not in os.environ:
            raise ValueError("AWS_LLM_EMBEDDINGS_ID environment variable not set")
        if "AWS_ACCESS_KEY_ID" not in os.environ:
            raise ValueError("AWS_ACCESS_KEY_ID environment variable not set")
        if "AWS_SECRET_ACCESS_KEY" not in os.environ:
            raise ValueError("AWS_SECRET_ACCESS_KEY environment variable not set")


def get_azure_search_vector_store(embeddings):
    """
    Get the Azure Search vector store using the provided embeddings.

    :param embeddings: The embeddings to be used for the vector store.
    :return: The Azure Search vector store.
    """
    azure_search_endpoint = os.getenv("AZURE_SEARCH_ENDPOINT")
    azure_search_api_key = os.getenv("AZURE_SEARCH_API_KEY")
    azure_search_index = os.getenv("AZURE_SEARCH_INDEX")

    vector_store = AzureSearch(
        azure_search_endpoint=azure_search_endpoint,
        azure_search_key=azure_search_api_key,
        index_name=azure_search_index,
        embedding_function=embeddings.embed_query,
    )
    return vector_store

In [3]:
embeddings = AWSBedrockEmbeddings().embeddings
vectorstore = get_azure_search_vector_store(embeddings)

## Setup Caching via Custom caching

### PostgreSQL Semantic Caching

In [4]:
from typing import Any, Optional, List
import os

import psycopg
from langchain_core.caches import RETURN_VAL_TYPE, BaseCache
from langchain_core.embeddings import Embeddings
from langchain_core.load.dump import dumps
from langchain_core.load.load import loads


DB_INIT_SQL = """
CREATE TABLE IF NOT EXISTS public.langchain_semantic_cache(
    id SERIAL PRIMARY KEY,
    prompt TEXT,
    prompt_embedding vector(1536),
    response TEXT,
    created_at timestamp default current_timestamp
);

CREATE INDEX IF NOT EXISTS langchain_semantic_cache_hnsw_index
    ON public.langchain_semantic_cache
    USING hnsw
    (prompt_embedding vector_cosine_ops)
    WITH (m = 8, ef_construction = 20);
"""



def get_postgres_conn_url():
    postgres_host = os.getenv("POSTGRES_HOST")
    postgres_port = os.getenv("POSTGRES_PORT")
    postgres_user = os.getenv("POSTGRES_USER")
    postgres_password = os.getenv("POSTGRES_PASSWORD")
    postgres_db = os.getenv("POSTGRES_DB")

    return f"postgresql://{postgres_user}:{postgres_password}@{postgres_host}:{postgres_port}/{postgres_db}"


def get_postgres_conn():
    return psycopg.connect(
        user=os.environ["POSTGRES_USER"],
        password=os.environ["POSTGRES_PASSWORD"],
        host=os.environ["POSTGRES_HOST"],
        port=os.environ["POSTGRES_PORT"],  # The port you exposed in docker-compose.yml
        dbname=os.environ["POSTGRES_DB"]
    )


def get_prompt_content(prompt: str):
    return loads(prompt)[0].content


def _init_db():
    with get_postgres_conn() as conn:
        with conn.cursor() as cursor:
            cursor.execute(DB_INIT_SQL)


class PostgreSQLSemanticCache(BaseCache):

    def __init__(self, embeddings: Embeddings, score_threshold: float = 0.7, top_k: int = 3):
        self.embeddings = embeddings
        self.score_threshold = score_threshold
        self.top_k = top_k
        _init_db()

    def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
        prompt_content = get_prompt_content(prompt)
        with get_postgres_conn() as conn:
            with conn.cursor() as cursor:
                cursor.execute("""
                    SELECT 
                        prompt,
                        response,
                        prompt_embedding <=> %(prompt_vector)s::vector as cosine_distance,
                        (1 - (prompt_embedding <=> %(prompt_vector)s::vector)) as cosine_similarity
                    FROM
                        langchain_semantic_cache
                    WHERE
                        prompt_embedding is not null
                        and (1 - (prompt_embedding <=> %(prompt_vector)s::vector)) >= %(match_threshold)s
                    ORDER BY 
                        cosine_distance ASC
                    LIMIT %(match_cnt)s
                """, {'prompt_vector': self.embeddings.embed_query(prompt_content),
                      'match_threshold': self.score_threshold,
                      'match_cnt': self.top_k})
                rows = cursor.fetchall()
                if len(rows) > 0:
                    return [loads(row[1]) for row in rows]

        return None

    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
        prompt_content = get_prompt_content(prompt)
        with get_postgres_conn() as conn:
            with conn.cursor() as cursor:
                for idx, gen in enumerate(return_val):
                    cursor.execute("""
                        INSERT INTO
                            langchain_semantic_cache
                        (prompt, response, prompt_embedding)
                        VALUES
                            (%(prompt)s, %(response)s, %(prompt_vector)s::vector)
                    """, {'prompt': prompt_content,
                          'response': dumps(gen),
                          'prompt_vector': self.embeddings.embed_query(prompt_content)})

    def clear(self, **kwargs: Any) -> None:
        with get_postgres_conn() as conn:
            with conn.cursor() as cursor:
                cursor.execute("""
                        TRUNCATE langchain_semantic_cache
                    """)


### Configure caching via Langchain

In [5]:
from langchain_core.globals import set_llm_cache

postgresql_semantic_cache = PostgreSQLSemanticCache(embeddings=embeddings)
set_llm_cache(postgresql_semantic_cache)

In [6]:
from langchain_openai import AzureChatOpenAI

azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
deployment_name = os.getenv("AZURE_LLM_MODEL_DEPLOYMENT_NAME")
openai_api_key = os.getenv("AZURE_API_KEY")
openai_api_version= os.getenv("AZURE_API_VERSION")

llm = AzureChatOpenAI(azure_endpoint=azure_endpoint, 
                  deployment_name=deployment_name,
                  openai_api_key=openai_api_key,
                  openai_api_version=openai_api_version,
                  temperature=0)

In [22]:
%%time
llm.invoke("Hello")

CPU times: total: 31.2 ms
Wall time: 424 ms


AIMessage(content='Hello! How can I assist you today?')

Clear the cache

In [8]:
# postgresql_semantic_cache.clear()