# Importing Libraries

In [1]:
# updated version
from time import time
import torch
from langchain_community.llms import Ollama
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import Chroma

import warnings
warnings.filterwarnings('ignore')


# Getting the llama3

In [3]:
llm = Ollama(model="llama3")

# Data Ingestion

In [4]:
# Data ingestion using PDF loader
loader = PyPDFLoader("pdf/Physics Classes 9-10.pdf")
documents = loader.load()
#print(documents)

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
all_splits = text_splitter.split_documents(documents)
#print(all_splits)

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = { "device": device }

embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs
)

print(f"Using device: {device}")

2024-09-18 19:33:07.055521: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-18 19:33:07.151601: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-18 19:33:07.174483: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-18 19:33:07.316788: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Using device: cpu


In [7]:
print(embeddings)

client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
) model_name='sentence-transformers/all-mpnet-base-v2' cache_folder=None model_kwargs={'device': 'cpu'} encode_kwargs={} multi_process=False show_progress=False


In [8]:
vectordb = Chroma.from_documents(
    documents=all_splits, 
    embedding=embeddings, persist_directory="chroma_db"
)

# Retrieval

In [16]:
retriever = vectordb.as_retriever(
    search_type="similarity_score_threshold", 
    search_kwargs={"score_threshold": 0.4}
)

In [17]:
retrieved_docs = retriever.invoke("What is force?")
len(retrieved_docs)

4

In [18]:
for doc in retrieved_docs:
    print(f"Docs: {doc.page_content}")
    print("\n")

Docs: law of motion. Newton’s first law of motion is- 
‘Every object will continue in its state of rest or of uniform motion in a straight line 
unless an external force is applied to it.’ 
Newton’s first law expresses the property of inertia of matter. From Newton’s first law of motion we observe that a body cannot change its state on its 
own. If the body is at rest, it tends to remain at rest forever and if it is in motion it tends to keep on motion with uniform speed for all time. This property of a body is termed as 
inertia. Thus from the Newton’s first law of motion we get the concept of inertia. 
Again from Newton’s first law we see that to change the state of a body something 
external must be applied. That is, the external cause which changes or tends to change 
the state of an object is called force. Thus from Newton’s first law we get qualitative definition of force. According to Newton’s first law, a force is that which acting on a


Docs: law of motion. Newton’s first law

# Text Generation 

In [19]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

In [20]:
example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()
example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:")]

In [21]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [22]:
rag_chain.invoke("What is force?")

"According to Newton's first law of motion, a force is an external cause that changes or tends to change the state of an object. In other words, it's something that acts on a body and causes its state (rest or motion) to be altered."