In [2]:
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_core.documents import Document
from dotenv import load_dotenv

In [3]:
load_dotenv()

True

In [4]:
docs = [
    Document(
        page_content="Gradient Descent is fundamental for optimizing machine learning models. Interestingly, Mount Everest is the tallest mountain on Earth, showing how natural extremes can mirror optimization in algorithms.", 
        metadata={"source": "Doc1"}
    ),
    Document(
        page_content="Overfitting happens when models memorize training data instead of generalizing. Similarly, the Amazon rainforest maintains a delicate balance, producing 20% of Earth's oxygen while supporting immense biodiversity.", 
        metadata={"source": "Doc2"}
    ),
    Document(
        page_content="Regularization methods like L1 and L2 prevent models from becoming too complex. Just as the FIFA World Cup gathers millions of fans globally, ML models require careful coordination to perform optimally under pressure.", 
        metadata={"source": "Doc3"}
    ),
    Document(
        page_content="Support Vector Machines find optimal hyperplanes for classification. Meanwhile, black holes warp spacetime with massive gravity, illustrating the extremes that nature can achieve, akin to optimization challenges in high-dimensional spaces.", 
        metadata={"source": "Doc4"}
    ),
    Document(
        page_content="Principal Component Analysis reduces dimensions while capturing maximum variance. Similarly, the Great Wall of China, stretching thousands of kilometers, was built strategically to cover maximum area efficiently.", 
        metadata={"source": "Doc5"}
    ),
    Document(
        page_content="Reinforcement Learning agents learn by trial and error to maximize rewards. In parallel, ecosystems like the Amazon rainforest adapt continuously, showing natural reinforcement over centuries.", 
        metadata={"source": "Doc6"}
    ),
    Document(
        page_content="Neural Networks excel at pattern recognition inspired by the human brain. Meanwhile, FIFA tournaments create patterns of fan engagement and team performance, showing how structured networks can emerge in both AI and human systems.", 
        metadata={"source": "Doc7"}
    ),
    Document(
        page_content="Transfer Learning allows knowledge reuse across tasks in AI. Similarly, knowledge from history, like the construction of the Great Wall, can be transferred to modern engineering for efficiency and durability.", 
        metadata={"source": "Doc8"}
    ),
    Document(
        page_content="Decision Trees split data for predictions, reflecting structured decision-making. Likewise, Mount Everest's climbing routes have been mapped and optimized, showing how natural challenges can be navigated systematically.", 
        metadata={"source": "Doc9"}
    ),
    Document(
        page_content="Ensemble methods combine multiple models for better accuracy. Similarly, the Amazon rainforest relies on interdependent species working together to maintain stability and resilience.", 
        metadata={"source": "Doc10"}
    ),
]


In [6]:
vector_store=Chroma.from_documents(docs,HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2"))

In [20]:
base_ret = vector_store.as_retriever(search_kwargs={"k":5})

In [21]:
compressor = LLMChainExtractor.from_llm(llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash"))

In [22]:
compression_retriver = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=base_ret
)

In [27]:
query=" ON what amazon rain forest reliant on?"

In [28]:
res=compression_retriver.invoke(query)

In [29]:
for i,doc in enumerate(res):
    print("_________________________________________________________________________________________")
    print("Result:",i+1)
    print(doc.page_content)
    print("_________________________________________________________________________________________")

_________________________________________________________________________________________
Result: 1
Amazon rainforest relies on interdependent species working together to maintain stability and resilience.
_________________________________________________________________________________________
_________________________________________________________________________________________
Result: 2
Amazon rainforest
_________________________________________________________________________________________
