Usage of this notebook: 
1. Install the hugging face embedding models Locally.  
2. Crate Embedding Vector Data Base with one of the 3 Chunking Methods. (Sementic,Recursive,Character)

* <b> HuggingFace Models </b>

    1.    "sentence-transformers/all-mpnet-base-v2"

    2.    "sentence-transformers/all-MiniLM-L6-v2"

    3.    "BAAI/bge-small-en-v1.5"

    4.    "nomic-ai/nomic-embed-text-v1.5"


In [1]:
%pip install -r requiremnets.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install einops

Note: you may need to restart the kernel to use updated packages.


## Path

In [3]:
# Set paths in your notebook
model_path = r"E:/CSE299/chatbot/llm"
embedding_save_path = r"E:/CSE299/chatbot/Embedding"
pdfs_path = r"E:/CSE299/chatbot/docs"

## Import Dependencies

In [4]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.llms.ollama import Ollama
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


## Chunking Functions & PDF loading

Load the PDF of the Book.

In [8]:
#@title Function to load all PDFs from a given directory using PyPDFDirectoryLoader.

def load_pdfs_from_directory(directory_path):

    loader = PyPDFDirectoryLoader(directory_path)
    docs = loader.load()
    return docs


### Recursive Chunking

In [9]:
#@title recursive Chuking of the documents

def recursive_chunks(documents, chunk_size, chunk_overlap):

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, add_start_index=True)
    all_splits = text_splitter.split_documents(documents)

    return all_splits

### Character Chunking

In [7]:
#@title character Chuking of the documents
from langchain_text_splitters import CharacterTextSplitter

def character_chunks(documents, chunk_size, chunk_overlap):

    # Initialize the CharacterTextSplitter with specified parameters
    splitter = CharacterTextSplitter(
        separator="\n\n",
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        is_separator_regex=False,
    )

    # Split the text into chunks for each document
    # all_chunks = []
    # for doc in documents:
    #     # Split each document's content and extend the list
    #     chunks = splitter.split_text(doc.page_content)
    #     all_chunks.extend(chunks)
    all_chunks = splitter.split_documents(documents)

    return all_chunks



### Semantic Chunking

In [None]:
#@title semantic Chuking of the documents
from langchain_experimental.text_splitter import SemanticChunker

def semantic_chunks(documents, embed_model):

    semantic_chunker = SemanticChunker(embed_model, breakpoint_threshold_type="percentile")

    # Create semantic chunks from the documents' content
    semantic_chunks = semantic_chunker.create_documents([d.page_content for d in documents])

    return semantic_chunks


## Intializing Models

Embedding Model

In [5]:
from langchain_huggingface import HuggingFaceEmbeddings


def get_embedding_model(model_name, model_kwargs, path):
    encode_kwargs = {'normalize_embeddings': True}

    # Initialize HuggingFaceEmbeddings with model name and kwargs
    hf = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
        cache_folder=path
    )

    return hf

LLM (Ollama model)

In [7]:
from langchain_community.llms.ollama import Ollama
import subprocess
import os
# Function to stream output of the subprocess
def stream_output(process):
    for line in process.stdout:
        print(line, end='')  # Output is already a string, so no need to decode

# Function to start Ollama
def start_ollama():
    try:
        # Redirect stdout and stderr to os.devnull
        with open(os.devnull, 'w') as devnull:
            ollama_process = subprocess.Popen(
                ["ollama", "serve"], 
                stdout=devnull,  # Discard stdout
                stderr=devnull,  # Discard stderr
                shell=True       # Required for Windows
            )
            print("Ollama is starting...", flush=True)
            return ollama_process
    except Exception as e:
        print(f"Error starting Ollama: {e}", flush=True)
        return None



# When Backend starts
ollama_process = start_ollama()

Ollama is starting...


Check Kernel Path of the Notebook.

In [8]:
import os
os.getcwd()


'e:\\CSE299\\chatbot\\rag_tests'

Initialize Ollama Model.

In [9]:
# @title get Ollama model

from langchain_community.llms import Ollama

def get_ollama_model(model):
    llm = Ollama(model=model)
    return llm


#  Embedding 

In [10]:
%pip install transformers

Note: you may need to restart the kernel to use updated packages.


Initilize the embedding model

- Change the **'device' : 'cuda'** if GPU is available.
- **embedding_model_save_path** : Local path of the embedding model (where it will be installed and run from). 
- **embedding_model_name** : At the beggining of this notebook you'll find 4 different model path.<br/>
 You can also use other Embedding models from HuggingFace website.
- You can find Embedding Model of your task from **MTEB Leaderboard**: [Massive Text Embedding Benchmark](https://huggingface.co/spaces/mteb/leaderboard)\
'Embedding Models Dimension Must match Ollama Model's Dimension.'



In [11]:
# import os
from transformers import AutoModel


embedding_model_save_path = r"E:\CSE299\chatbot\llm\mpnet"



embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu', 'trust_remote_code': True}

# Assuming get_embedding_model is defined to support a 'path' argument for saving locally
embedding = get_embedding_model(embedding_model_name, model_kwargs, path=embedding_model_save_path)

  from .autonotebook import tqdm as notebook_tqdm


Dimension

In [12]:
# Access the underlying model from the HuggingFaceEmbeddings instance
hf_model = embedding.client  # 'client' holds the underlying Hugging Face model

# Get the embedding dimension using the dedicated method
embedding_dim = hf_model.get_sentence_embedding_dimension()

# Print the embedding dimension
print(f"Embedding Dimension: {embedding_dim}")

Embedding Dimension: 768


## Make Chunks 

- Choose one of the Chunking Function.

In [21]:
# #@title chunking

docs = load_pdfs_from_directory(pdfs_path)

# all_splits = recursive_chunks(docs, 1000, 200)
# all_splits = semantic_chunks(docs, sem_embedding)
# all_splits = character_chunks(docs, 1000, 200)

**Making new vector database with the splits(chunks).** <br/>
To make the Embedding Vector Database run the following Cell. <br/>
(If already exists skip it & run the next cell to initialize.)

In [45]:
# #@title dbstore
# embedding_vector_db_path = r"E:\CSE299\chatbot\Embedding\miniml\recursive"
# #vectorstoring
# vectorstore = Chroma.from_documents(documents = all_splits, embedding = embedding, persist_directory=embedding_vector_db_path)
# retriever = vectorstore.as_retriever(search_type = "similarity", search_kwargs = {"k" : 6})


# Existing Vector DB load

In [13]:
from langchain_chroma import Chroma
embedding_vector_db_path = r"E:\CSE299\chatbot\Embedding\mpnet\recursive"

vectorstore = Chroma(persist_directory=embedding_vector_db_path, embedding_function=embedding)

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

To run in Google Colab

In [None]:
# from langchain.vectorstores import Chroma

# # Load existing vector store (you don't need to recreate it)
# vectorstore = Chroma(persist_directory='db/nomic/semantic/',embedding_function=embedding)
# retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})


Prompt

In [14]:
#@title prompt template

PROMPT_TEMPLATE = """
Use the following context to answer the question clearly and simply:
{context}
Answer the question based on the above context: {question}.
Provide a detailed answer.
Do not justify your answers.
Please explain using simple language, relevant examples, and avoid jargon.
"""



Test if the Olllam Models runs in this notebook kernel.

In [15]:
llm = get_ollama_model("llama3.2:1b")
# prompt = process_query("explain newtons first law with example")
# response = llm(prompt)
# print(response)


In [16]:
llm.invoke("explain newtons first law with example")

'Newton\'s First Law of Motion, also known as the Law of Inertia, is a fundamental concept in physics that describes the behavior of objects at rest or in motion. It states:\n\n**"An object at rest will remain at rest, and an object in motion will continue to move with a constant velocity, unless acted upon by an external force."**\n\nIn simpler terms, this law says that:\n\n* If an object is not moving (at rest), it will stay at rest.\n* If an object is already moving (in motion), it will keep moving with the same speed and direction unless:\n* An external force acts on the object.\n\nLet\'s consider an example to illustrate this concept:\n\n**Example: A Ball Rolling Down a Hill**\n\nImagine you\'re standing near a hill, and you throw a ball down the slope. What happens? The ball will roll down the hill, but it will continue to move until something stops it.\n\nThe key point here is that the ball is initially at rest (not moving). However, as soon as it\'s thrown down the hill, an ext

In [17]:
# from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

llm = get_ollama_model("gemma2:2b")

prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)

retrieval_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in retrieval_chain.stream("explain centrufugal force with example"):
    print(chunk, end="", flush=True)

Centrifugal force is a type of force that pulls something away from its center. Imagine you're sitting in a circle on the floor, spinning around it. You might feel like you want to get out of the circle because you don't want to move towards the center where everyone else is.

The reason for this feeling is centrifugal force. It's as if your body is being pulled away from the middle point of the circle, making you want to move in that direction. This happens because there are more people in the outer parts of the circle than in the middle, so they exert a greater pull on each other.

For example, when you're riding a bike and going around a corner, centrifugal force makes your body feel like it's being pulled away from the center of the turn. It's not because you want to go that way, but because there are more people in front of you making the force stronger.

Centrifugal force can be seen in many things, like when you're spinning around a central point or when a car is moving in a cir

Check Retriver of the Vector Database works.

In [18]:
# Get the retrieved documents and print them
retrieved_docs = retriever.get_relevant_documents("explain centrifugal force with example")

# Print the retrieved documents
for doc in retrieved_docs:
    print(doc.metadata, doc.page_content)  # Or just doc.page_content if metadata is not needed

{'page': 57, 'source': 'data/Physics Classes 9-10 (English Version) - National Curriculum and Textbook Board of Bangladesh - PDF Room.pdf', 'start_index': 824} to gravity. Here, the force of gravity or weight of the 
object is the unbalanced force. If the body is displaced slightly along one side, the tension of the 
thread T and the weight W will not be in a straight 
line. Then a resultant force will act on the body without creating balanced condition. Due to this, the 
body will oscillate. This is an example of unbalanced 
force. 
Another example of balanced and unbalanced forces 
can be seen in the game of tug-of-war competition. In this game, a handkerchief is tied to the center of the rope. In this competition equal 
numbers of competitors pull on the rope in two sides and try to move the handkerchief to 
their side. If the handkerchief does not move then it is understood that either the team are applying equal amount of force and the rope or the handkerchief is in balanced 
cond

  retrieved_docs = retriever.get_relevant_documents("explain centrifugal force with example")


# Recall + Quality

In [46]:
# import time
# import numpy as np
# from sklearn.metrics.pairwise import cosine_similarity

# # Embedding size - assuming you are using the 'embedding_function' from Chroma
# embedding_size = embedding_dim  # For transformers-based models

# # Step 1: Measure recall time of chunks
# start_time = time.time()
# retrieved_docs = retriever.get_relevant_documents("explain newtons first law with example")
# end_time = time.time()

# recall_time = end_time - start_time
# print(f"Recall time: {recall_time:.4f} seconds")

# # Step 2: Calculate embedding quality (Cosine similarity)
# def calculate_embedding_quality(query_embedding, retrieved_embeddings):
#     # Calculate cosine similarity between the query embedding and the retrieved embeddings
#     similarities = cosine_similarity([query_embedding], retrieved_embeddings)

#     # Average similarity score
#     avg_similarity = np.mean(similarities)
#     return avg_similarity

# # Simulating query embedding and retrieved embeddings for this example
# query_embedding = embedding.embed_query("explain newtons first law with example")
# retrieved_embeddings = [doc.embedding for doc in retrieved_docs]

# # Step 3: Calculate embedding quality
# embedding_quality = calculate_embedding_quality(query_embedding, retrieved_embeddings)
# print(f"Embedding Quality (Average Cosine Similarity): {embedding_quality:.4f}")
