# Package Installation and Imports

In [78]:
!pip install faiss-cpu llama-index python-dotenv
!pip install llama-index faiss-cpu openai transformers sentence-transformers python-dotenv



In [80]:
from typing import List
from langchain_community.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
import faiss
import os
import sys
from dotenv import load_dotenv

EMBED_DIMENSION = 512

CHUNK_SIZE = 200
CHUNK_OVERLAP = 50

load_dotenv()

os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv('HUGGINGFACEHUB_API_TOKEN')

Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

### Read Docs

In [90]:
import os
os.makedirs('data', exist_ok=True)

!wget -O data/Understanding_Climate_Change.pdf https://raw.githubusercontent.com/NirDiamant/RAG_TECHNIQUES/main/data/Understanding_Climate_Change.pdf

--2025-06-20 15:27:56--  https://raw.githubusercontent.com/NirDiamant/RAG_TECHNIQUES/main/data/Understanding_Climate_Change.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 206372 (202K) [application/octet-stream]
Saving to: ‘data/Understanding_Climate_Change.pdf’


2025-06-20 15:27:56 (5.64 MB/s) - ‘data/Understanding_Climate_Change.pdf’ saved [206372/206372]



In [91]:
path = "data/"
node_parser = SimpleDirectoryReader(input_dir=path, required_exts=['.pdf'])
documents = node_parser.load_data()
print(documents[0])

Doc ID: b44da83b-9b8d-4b08-bf5e-9001265aa6df
Text: Understanding Climate Change  Chapter 1: Introduction to Climate
Change  Climate change refers to significant, long-term changes in the
global climate. The term  "global climate" encompasses the planet's
overall weather patterns, including temperature,  precipitation, and
wind patterns, over an extended period. Over the past century, human
acti...


### Vector Store

In [92]:
faiss_index = faiss.IndexFlatL2(EMBED_DIMENSION)
vector_store = FaissVectorStore(faiss_index=faiss_index)

### Text Cleaner Transformation

In [93]:
class TextCleaner(TransformComponent):
    """
    Transformation to be used within the ingestion pipeline.
    Cleans clutters from texts.
    """
    def __call__(self, nodes, **kwargs) -> List[BaseNode]:

        for node in nodes:
            node.text = node.text.replace('\t', ' ')
            node.text = node.text.replace(' \n', ' ')

        return nodes

### Ingestion Pipeline

In [94]:
text_splitter = SentenceSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)

pipeline = IngestionPipeline(
    transformations=[
        text_splitter,
        TextCleaner(),
    ],
    vector_store=vector_store,
)

In [95]:
nodes = pipeline.run(documents=documents)

### Create retriever

In [96]:
vector_store_index = VectorStoreIndex(nodes)
retriever = vector_store_index.as_retriever(similarity_top_k=2)

### Test retriever

In [97]:
def show_context(context):
    for i, c in enumerate(context):
        print(f"Context {i+1}:")
        print(c.text)
        print("\n")

In [102]:
query = "What are the effects of climate change?"
context = retriever.retrieve(query)
show_context(context)

Context 1:
Practices such as precision farming and organic fertilizers can mitigate these emissions. The development of eco-friendly fertilizers and farming techniques is essential for reducing the agricultural sector's carbon footprint. Chapter 3: Effects of Climate Change The effects of climate change are already being felt around the world and are projected to intensify in the coming decades. These effects include: Rising Temperatures Global temperatures have risen by about 1.2 degrees Celsius (2.2 degrees Fahrenheit) since the late 19th century. This warming is not uniform, with some regions experiencing more significant increases than others. Heatwaves Heatwaves are becoming more frequent and severe, posing risks to human health, agriculture, and infrastructure. Cities are particularly vulnerable due to the "urban heat island" effect.


Context 2:
Heatwaves Heatwaves are becoming more frequent and severe, posing risks to human health, agriculture, and infrastructure. Cities are pa