# Importing Libraries

In [1]:
# updated version
from time import time
import torch
from langchain_community.llms import Ollama
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import Chroma

import warnings
warnings.filterwarnings('ignore')


In [39]:
GEMMA = "gemma:2b"
LLAMA = "llama3"
QWEN = "qwen:4b"

# Getting the Language Models

In [44]:
llm = Ollama(model=GEMMA)

# Data Ingestion

In [27]:
# Data ingestion using PDF loader
loader = PyPDFLoader("pdf/Physics Classes 9-10.pdf")
documents = loader.load()
#print(documents)

In [28]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
all_splits = text_splitter.split_documents(documents)
#print(all_splits)

In [29]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = { "device": device }

# embeddings = HuggingFaceEmbeddings(
#     model_name=model_name,
#     model_kwargs=model_kwargs
# )

print(f"Using device: {device}")

Using device: cpu


In [48]:
def create_embeddings(model_name, model_kwargs):
    print(f"Using device: {device}")
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs
    )
    return embeddings

embeddings = create_embeddings("sentence-transformers/all-MiniLM-L6-v2", model_kwargs)

Using device: cpu


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [49]:
print(embeddings)

client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
) model_name='sentence-transformers/all-MiniLM-L6-v2' cache_folder=None model_kwargs={'device': 'cpu'} encode_kwargs={} multi_process=False show_progress=False


In [50]:
vectordb = Chroma.from_documents(
    documents=all_splits, 
    embedding=embeddings, persist_directory="minilm_db"
)

# Retrieval

In [51]:
retriever = vectordb.as_retriever(
    search_type="similarity_score_threshold", 
    search_kwargs={"score_threshold": 0.4}
)

In [58]:
retrieved_docs = retriever.invoke("What is motion?")
len(retrieved_docs)

2

In [59]:
for doc in retrieved_docs:
    print(f"Docs: {doc.page_content}")
    print("\n")

Docs: 26  Physics 
Chapter Two  
MOTION 
 
 
[The object, that we see around us either are stationery or in motion. What do we 
actually understand by the words ``rest’’ and ``motion’’. We need different quantities regarding motion to express the characteristics of motion of a moving object. In this chapter we will discuss different quantities regarding motion, their dimensions, units, the 
relations among them etc.] 
By the end of this chapter we will be able to -  
1. Explain the rest and motion  
2. Find out the difference among different types of motion.  
3. Explain the scalar and vector quantities  
4. Analyze the relation among the quantities regarding motion  5. Explain the motion of freely falling bodies  
6. Analyze the relations among the quantities regarding motion with the help of graph 
7. Realize the effect of motion in our life


Docs: Physics  29 
 
Fig-2.1(a)                                                             Fig-2.1(b) 
In the first example, the position of 

# Text Generation 

In [54]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

In [55]:
example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()
example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:")]

In [56]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [57]:
rag_chain.invoke("What is motion?")

"Sure, here's the answer to the question:\n\nMotion is the change in position of an object with respect to time. It can be described in terms of either the distance traveled or the displacement undergone."