In [1]:
import os
import time
import hashlib
import numpy as np
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.messages import HumanMessage
from sklearn.metrics.pairwise import cosine_similarity

load_dotenv(override=True)
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

llm = ChatOpenAI(model="gpt-5-nano",temperature=0,api_key=OPENAI_API_KEY)

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small",api_key=OPENAI_API_KEY)

In [3]:
DOCUMENTS = [
    "Artificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making.",
    "It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals.",
    "Various subfields of AI research are centered around particular goals and the use of particular tools.",
    "The traditional goals of AI research include learning, reasoning, knowledge representation, planning, natural language processing, perception, and support for robotics."]


In [4]:
def get_embedding(text: str):
    vector = embedding_model.embed_query(text)
    return np.array(vector).reshape(1, -1)

In [5]:
def retrieve_documents(query, top_k=2):
    query_vec = get_embedding(query)
    
    similarities = []
    
    for doc in DOCUMENTS:
        doc_vec = get_embedding(doc)
        sim = cosine_similarity(query_vec, doc_vec)[0][0]
        similarities.append((doc, sim))
    
    similarities.sort(key=lambda x: x[1], reverse=True)
    
    return [doc for doc, _ in similarities[:top_k]]

In [6]:
def hash_context(query, documents):
    combined = query + "||" + "||".join(documents)
    return hashlib.sha256(combined.encode()).hexdigest()

In [7]:
class ContextAwareRAGCache:
    def __init__(self, similarity_threshold=0.85):
        self.retrieval_cache = []
        self.generation_cache = {}
        self.similarity_threshold = similarity_threshold

    # ---------- Retrieval Cache ----------
    def get_cached_retrieval(self, query):
        query_vec = get_embedding(query)

        for entry in self.retrieval_cache:
            similarity = cosine_similarity(
                query_vec,
                entry["embedding"]
            )[0][0]

            if similarity >= self.similarity_threshold:
                print("Retrieval Cache HIT")
                return entry["documents"]

        print("Retrieval Cache MISS")
        return None

    def set_retrieval(self, query, documents):
        self.retrieval_cache.append({
            "embedding": get_embedding(query),
            "documents": documents
        })

    # ---------- Generation Cache ----------
    def get_cached_generation(self, query, documents):
        context_key = hash_context(query, documents)

        if context_key in self.generation_cache:
            print("Generation Cache HIT")
            return self.generation_cache[context_key]

        print("Generation Cache MISS")
        return None

    def set_generation(self, query, documents, response):
        context_key = hash_context(query, documents)
        self.generation_cache[context_key] = response

In [8]:
rag_cache = ContextAwareRAGCache(similarity_threshold=0.9)

In [9]:
def rag_pipeline(query):

    docs = rag_cache.get_cached_retrieval(query)

    if docs is None:
        docs = retrieve_documents(query)
        rag_cache.set_retrieval(query, docs)

    cached_response = rag_cache.get_cached_generation(query, docs)
    if cached_response:
        return cached_response

    context = "\n".join(docs)

    prompt = f"""
    Answer the question using context:

    Context:
    {context}

    Question:
    {query}
    """

    response = llm.invoke([HumanMessage(content=prompt)])
    output = response.content

    rag_cache.set_generation(query, docs, output)

    return output

In [10]:
query1 = "Explain AI"
query2 = "Describe AI"

print("First Call")
rag_pipeline(query1)

print("\nSecond Call (Similar Query)")
rag_pipeline(query2)

First Call
Retrieval Cache MISS
Generation Cache MISS

Second Call (Similar Query)
Retrieval Cache HIT
Generation Cache MISS


'AI is the ability of computational systems to perform tasks that are typically associated with human intelligence. These tasks include learning, reasoning, problem-solving, perception, and decision-making.\n\nTraditional goals of AI research include:\n- Learning\n- Reasoning\n- Knowledge representation\n- Planning\n- Natural language processing\n- Perception\n- Support for robotics'

In [13]:
query3 = "Explain about AI"

print("\nThird Call")
rag_pipeline(query3)


Third Call
Retrieval Cache HIT
Generation Cache MISS


'Artificial intelligence (AI) is the capability of computational systems to perform tasks that are typically associated with human intelligence. These tasks include:\n\n- Learning: improving performance with experience\n- Reasoning: drawing conclusions from information\n- Problem-solving: finding solutions to complex issues\n- Perception: interpreting sensory data\n- Decision-making: choosing actions to take\n\nHistorically, the traditional goals of AI research have included:\n\n- Learning\n- Reasoning\n- Knowledge representation\n- Planning\n- Natural language processing\n- Perception\n- Support for robotics\n\nIn short, AI aims to simulate or assist human cognitive functions to automate tasks, analyze information, and enable intelligent interactions.'

In [14]:
query4 = "Explain about AI"

print("\nThird Call")
rag_pipeline(query4)


Third Call
Retrieval Cache HIT
Generation Cache HIT


'Artificial intelligence (AI) is the capability of computational systems to perform tasks that are typically associated with human intelligence. These tasks include:\n\n- Learning: improving performance with experience\n- Reasoning: drawing conclusions from information\n- Problem-solving: finding solutions to complex issues\n- Perception: interpreting sensory data\n- Decision-making: choosing actions to take\n\nHistorically, the traditional goals of AI research have included:\n\n- Learning\n- Reasoning\n- Knowledge representation\n- Planning\n- Natural language processing\n- Perception\n- Support for robotics\n\nIn short, AI aims to simulate or assist human cognitive functions to automate tasks, analyze information, and enable intelligent interactions.'