In [30]:
from pathlib import Path

# pip install sentence-transformers
# pip install tf-keras

In [31]:
def clean_text(text):
    text = text.replace("\n", " ")
    text = " ".join(text.split())
    return text


def chunk_text(text, chunk_size=250, overlap=50):
    words = text.split()
    chunks = []

    start = 0
    while start < len(words):
        end = start + chunk_size
        chunk = words[start:end]
        chunks.append(" ".join(chunk))
        start = end - overlap

    return chunks


def load_documents(folder_path):
    documents = []
    for file in Path(folder_path).glob("*.txt"):
        with open(file, "r", encoding="utf-8") as f:
            documents.append(f.read())
    return documents

In [32]:
# Step 1 - Prepare your documents

documents = load_documents("docs")

all_chunks = []

for doc in documents:
    cleaned_doc = clean_text(doc)
    chunks = chunk_text(cleaned_doc)
    all_chunks.extend(chunks)

In [33]:
print(all_chunks)
print(len(all_chunks))

['Muhammad Ali Jinnah (born December 25, 1876, in Karachi, British India; died September 11, 1948, in Karachi, Pakistan) was a lawyer, politician, and statesman who became the founder and first Governor-General of Pakistan. He is widely known in Pakistan as Quaid-i-Azam, meaning “Great Leader,” and is regarded as the father of the nation. Jinnah was the eldest of seven children born to Jinnahbhai Poonja, a prosperous merchant, and Mithibai. His family belonged to the Khoja community, which had converted to Islam centuries earlier. There has been historical debate regarding Jinnah’s exact date of birth, with some school records suggesting October 20, 1875, although Jinnah himself consistently maintained that he was born on December 25, 1876. Jinnah received his early education in Karachi at the Sind Madrasat al-Islam and later at the Christian Missionary Society High School. At the age of sixteen, he passed the matriculation examination of the University of Bombay. In 1892, he traveled 

In [34]:
# Step 2 - Create embeddings

from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

In [35]:
# Create Embeddings for All Chunks

embeddings = model.encode(all_chunks, convert_to_numpy=True)

In [36]:
embeddings[0]

array([-2.23530978e-02,  1.17044158e-01, -1.09616108e-01,  1.31062772e-02,
       -2.02971064e-02, -4.49897628e-03,  5.03331423e-02, -1.22961001e-02,
       -2.07347125e-02,  7.01877251e-02,  8.64436552e-02, -2.88956091e-02,
        1.22888006e-01,  2.26842817e-02,  5.63617162e-02,  2.71614343e-02,
       -4.17505167e-02,  2.95500793e-02, -2.88321301e-02, -6.10446185e-02,
       -6.43875152e-02,  4.58866954e-02,  1.12733841e-02, -5.36945723e-02,
       -3.71633433e-02,  6.40500942e-03,  6.12775758e-02, -5.26736490e-02,
       -1.18308514e-02,  7.42415115e-02, -6.74502272e-03, -5.09647951e-02,
       -1.47729972e-02,  1.25702936e-02, -4.44020815e-02, -3.44108194e-02,
        1.27394302e-02,  1.15118973e-01,  1.75540317e-02, -4.88204956e-02,
        7.63906091e-02,  3.69355530e-02, -4.37329449e-02, -2.14645285e-02,
        1.05999589e-01, -1.09444363e-02,  1.76148359e-02,  4.32081521e-03,
       -7.48065300e-03, -2.09700014e-03, -8.21138918e-02,  3.49580012e-02,
       -8.23437236e-03, -

In [37]:
print(embeddings.shape)

# (number_of_chunks, 384)

(10, 384)


In [38]:
# Save Embeddings

import numpy as np

np.save("chunk_embeddings.npy", embeddings)

### Step 3 - Build a simple similarity search


In [39]:
# From Step 1 & 2, you already have:

print(all_chunks)  # List[str]
print(embeddings)  # numpy.ndarray, shape = (num_chunks, 384)
print(model)  # SentenceTransformer("all-MiniLM-L6-v2")

['Muhammad Ali Jinnah (born December 25, 1876, in Karachi, British India; died September 11, 1948, in Karachi, Pakistan) was a lawyer, politician, and statesman who became the founder and first Governor-General of Pakistan. He is widely known in Pakistan as Quaid-i-Azam, meaning “Great Leader,” and is regarded as the father of the nation. Jinnah was the eldest of seven children born to Jinnahbhai Poonja, a prosperous merchant, and Mithibai. His family belonged to the Khoja community, which had converted to Islam centuries earlier. There has been historical debate regarding Jinnah’s exact date of birth, with some school records suggesting October 20, 1875, although Jinnah himself consistently maintained that he was born on December 25, 1876. Jinnah received his early education in Karachi at the Sind Madrasat al-Islam and later at the Christian Missionary Society High School. At the age of sixteen, he passed the matriculation examination of the University of Bombay. In 1892, he traveled 

In [40]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [41]:
# Convert User Question into Embedding
def embed_question(question, model):
    return model.encode([question], convert_to_numpy=True)


# Compute Similarity Scores
def compute_similarity(question_embedding, chunk_embeddings):
    similarities = cosine_similarity(question_embedding, chunk_embeddings)
    return similarities[0]  # flatten array


# Retrieve Top 3 Relevant Chunks
def retrieve_top_chunks(question, chunks, embeddings, model, top_k=3):
    question_embedding = embed_question(question, model)
    similarity_scores = compute_similarity(question_embedding, embeddings)

    top_indices = np.argsort(similarity_scores)[-top_k:][::-1]

    top_chunks = [chunks[i] for i in top_indices]
    return top_chunks

In [42]:
question = "What is name of Quaid-e-azam's father?"

top_chunks = retrieve_top_chunks(
    question=question, chunks=all_chunks, embeddings=embeddings, model=model
)

for i, chunk in enumerate(top_chunks, 1):
    print(f"\n--- Chunk {i} ---\n{chunk}")


--- Chunk 1 ---
Muhammad Ali Jinnah (born December 25, 1876, in Karachi, British India; died September 11, 1948, in Karachi, Pakistan) was a lawyer, politician, and statesman who became the founder and first Governor-General of Pakistan. He is widely known in Pakistan as Quaid-i-Azam, meaning “Great Leader,” and is regarded as the father of the nation. Jinnah was the eldest of seven children born to Jinnahbhai Poonja, a prosperous merchant, and Mithibai. His family belonged to the Khoja community, which had converted to Islam centuries earlier. There has been historical debate regarding Jinnah’s exact date of birth, with some school records suggesting October 20, 1875, although Jinnah himself consistently maintained that he was born on December 25, 1876. Jinnah received his early education in Karachi at the Sind Madrasat al-Islam and later at the Christian Missionary Society High School. At the age of sixteen, he passed the matriculation examination of the University of Bombay. In 189

### Step 4 - Build the RAG prompt


In [43]:
def build_rag_prompt(question, context_chunks):
    context = "\n\n".join(context_chunks)

    prompt = f"""
    You are a helpful assistant. Answer the question **using the context provided**.
    - Be concise and direct.
    - Use only the context, but infer answers if enough information is present.
    - If the answer is not in the context, say "Not enough information in the provided context."

    Context:
    {context}

    Question: {question}
    Answer:
    """

    return prompt


In [44]:
question = "What is name of Quaid-e-azam's father?"

In [45]:
rag_prompt = build_rag_prompt(question, top_chunks)

print(rag_prompt)


    You are a helpful assistant. Answer the question **using the context provided**.
    - Be concise and direct.
    - Use only the context, but infer answers if enough information is present.
    - If the answer is not in the context, say "Not enough information in the provided context."

    Context:
    Muhammad Ali Jinnah (born December 25, 1876, in Karachi, British India; died September 11, 1948, in Karachi, Pakistan) was a lawyer, politician, and statesman who became the founder and first Governor-General of Pakistan. He is widely known in Pakistan as Quaid-i-Azam, meaning “Great Leader,” and is regarded as the father of the nation. Jinnah was the eldest of seven children born to Jinnahbhai Poonja, a prosperous merchant, and Mithibai. His family belonged to the Khoja community, which had converted to Islam centuries earlier. There has been historical debate regarding Jinnah’s exact date of birth, with some school records suggesting October 20, 1875, although Jinnah himself cons

In [46]:
import ollama

response = ollama.chat(
    model="llama3.2", messages=[{"role": "user", "content": rag_prompt}]
)

print(response["message"]["content"])

Jinnahbhai Poonja.


### Step 5 - Test RAG vs No-RAG


In [47]:
questions = [
    "What was the name of Muhammad Ali Jinnah's father?",
    "When and where was Muhammad Ali Jinnah born, and what title is he commonly known by in Pakistan?",
    "What was the Lahore Resolution of 1940, and what major outcome did it lead to?",
    "What device is used to capture solar energy and convert it into electricity?",
    "Why are batteries often used in solar energy systems?",
    "When did World War II begin, and which countries declared war on Germany in response to the invasion of Poland?",
    "Which two Japanese cities were targeted by atomic bombs, and what was the outcome of these bombings?",
    "What are the three main types of volcanoes, and give one example of each type?",
    "Name two famous volcanic eruptions mentioned in the text and describe one major consequence of each.",
    "What is the main advantage of using a Retrieval-Augmented Generation (RAG) system compared to relying solely on a language model?",
]

In [48]:
# LLaMA Without RAG
def ask_llama_no_rag(question):
    response = ollama.chat(
        model="llama3.2", messages=[{"role": "user", "content": question}]
    )
    return response["message"]["content"]

In [49]:
def ask_llama_with_rag(question, chunks, embeddings, model):
    top_chunks = retrieve_top_chunks(
        question=question, chunks=chunks, embeddings=embeddings, model=model
    )

    prompt = build_rag_prompt(question, top_chunks)

    response = ollama.chat(
        model="llama3.2", messages=[{"role": "user", "content": prompt}]
    )
    return response["message"]["content"]


# sample usage

# answer = ask_llama_with_rag(
#     question=questions[0], chunks=all_chunks, embeddings=embeddings, model=model
# )
# answer

In [None]:
with open("rag_qa_results.txt", "w", encoding="utf-8") as f:
    for idx, question in enumerate(questions, start=1):
        try:
            answer = ask_llama_with_rag(
                question=question, chunks=all_chunks, embeddings=embeddings, model=model
            )

            # Write question and answer to file
            f.write(f"Question {idx}: {question}\n")
            f.write(f"Answer {idx}: {answer}\n")
            f.write("-" * 80 + "\n")

            # Print Q&A to console
            print(f"Question {idx}: {question}")
            print(f"Answer {idx}: {answer}")
            print("-" * 80)

        except Exception as e:
            f.write(f"Question {idx}: {question}\n")
            f.write(f"Answer {idx}: Error occurred - {e}\n")
            f.write("-" * 80 + "\n")

            # Print error to console
            print(f"Question {idx}: {question}")
            print(f"Answer {idx}: Error occurred - {e}")
            print("-" * 80)

print("RAG Q&A results saved to 'rag_qa_results.txt'.")


Question 1: What was the name of Muhammad Ali Jinnah's father?
Answer 1: Muhammad Ali Jinnah's father was Jinnahbhai Poonja.
--------------------------------------------------------------------------------
Question 2: When and where was Muhammad Ali Jinnah born, and what title is he commonly known by in Pakistan?
Answer 2: Muhammad Ali Jinnah was born on December 25, 1876, in Karachi, British India. He is widely known in Pakistan as Quaid-i-Azam, meaning “Great Leader,” and is regarded as the father of the nation.
--------------------------------------------------------------------------------
Question 3: What was the Lahore Resolution of 1940, and what major outcome did it lead to?
Answer 3: The Lahore Resolution of 1940 was a resolution adopted by the Muslim League under Muhammad Ali Jinnah's leadership, calling for the creation of independent states for Muslims in the Indian subcontinent. This demand eventually led to the establishment of Pakistan in 1947.
--------------------------

In [52]:
# File to store answers from LLaMA without RAG
with open("llama_no_rag_results.txt", "w", encoding="utf-8") as f:
    for idx, question in enumerate(questions, start=1):
        try:
            # Get answer from LLaMA without RAG
            answer = ask_llama_no_rag(question)

            # Write question and answer to file
            f.write(f"Question {idx}: {question}\n")
            f.write(f"Answer {idx}: {answer}\n")
            f.write("-" * 80 + "\n")

            # Also print to console
            print(f"Question {idx}: {question}")
            print(f"Answer {idx}: {answer}")
            print("-" * 80)

        except Exception as e:
            # Log errors and continue
            f.write(f"Question {idx}: {question}\n")
            f.write(f"Answer {idx}: Error occurred - {e}\n")
            f.write("-" * 80 + "\n")

            print(f"Question {idx}: {question}")
            print(f"Answer {idx}: Error occurred - {e}")
            print("-" * 80)

print(f"LLaMA no-RAG Q&A results saved to `llama_no_rag_results.txt`.")


Question 1: What was the name of Muhammad Ali Jinnah's father?
Answer 1: Muhammad Ali Jinnah's father was Jinnahbhai Pirabhoi.
--------------------------------------------------------------------------------
Question 2: When and where was Muhammad Ali Jinnah born, and what title is he commonly known by in Pakistan?
Answer 2: Muhammad Ali Jinnah was born on December 25, 1876, in Karachi, British India (now Pakistan). He is commonly known as "Quaid-e-Azam" in Pakistan.
--------------------------------------------------------------------------------
Question 3: What was the Lahore Resolution of 1940, and what major outcome did it lead to?
Answer 3: The Lahore Resolution of 1940 was a resolution adopted by the All-India Muslim League at its annual session in Lahore, Punjab, British India. The resolution was proposed by Muhammad Ali Jinnah, who later became the founder of Pakistan.

The resolution stated that the Muslims of the Indian subcontinent had been subject to discrimination and marg