In [13]:
#Importing required Libraries
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
import os
import time

In [7]:
#Using groq framework for loading LLM model
groq_api_key = "gsk_bhFeDUr4Ly9XFB9IwGmaWGdyb3FYlvQTsc8S7TQH4MXNTxxxxxx"
llm = ChatGroq(groq_api_key=groq_api_key, model_name="llama3-70b-8192")

In [8]:

#Using Huggingface for embedding documents
embeddings = HuggingFaceBgeEmbeddings(
                model_name="sentence-transformers/all-mpnet-base-v2",
                model_kwargs={'device': 'cpu'},
                encode_kwargs={'normalize_embeddings': True}
    )



In [9]:
#For Creating and saving the vectors using faissdb
FAISS_INDEX_PATH = "faiss_index_temp" #Path for Saving and loading vectors
loader = PyPDFDirectoryLoader(r"PDF_DOC")  # Data Ingestion from given directory
docs = loader.load()  # Document Loading
print(f"Loaded {len(docs)} documents.")


text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)  # Chunk Creation
final_documents = text_splitter.split_documents(docs[5:54]) #Using 5 and 54 for selecting Chap 1-2 
print(f"Splited into {len(final_documents)} chunks.")

if not final_documents:
    print("No final documents available for embeddings.")
    
print("Vector embeddings Started.")

vectors = FAISS.from_documents(final_documents, embeddings)  # Vector embeddings
print("Vector embeddings created.")

vectors.save_local(FAISS_INDEX_PATH)
print("Vector embeddings Saved.")

Loaded 615 documents.
Splited into 155 chunks.
Vector embeddings Started.
Vector embeddings created.
Vector embeddings Saved.


In [17]:
#For loading saved Vector Embeddings
FAISS_INDEX_PATH = r"faiss_index"
vectors = FAISS.load_local(folder_path=FAISS_INDEX_PATH,embeddings = embeddings,allow_dangerous_deserialization=True)

In [18]:
#LLM Input prompt template
prompt = ChatPromptTemplate.from_template(
"""
Answer the questions based on the provided context only.
Please provide the most accurate response based on the question
<context>
{context}
<context>
Questions: {input}
"""
)

In [19]:
document_chain = create_stuff_documents_chain(llm, prompt) # Combining LLM and Input prompt together
retriever = vectors.as_retriever() #Creating retrievers from the faiss vector database. A retriever is an interface that returns documents given an unstructured query.

In [21]:
retrieval_chain = create_retrieval_chain(retriever, document_chain) # Creating Chains so as to have sequences of calls

In [15]:
#Invoking call on the created chain with the input question
ques = "What are isotopes?"
response = retrieval_chain.invoke({'input': ques.lower()})
response['answer']

'Isotopes are different forms of the same element that have the same number of protons, but a different number of neutrons.'

In [22]:
ques = "What is the atomic number of hydrogen. ALso calsulate the atomic mass of the carbon."
response = retrieval_chain.invoke({'input': ques.lower()})
response['answer']

"Based on the provided context, I'll do my best to answer your questions:\n\n1. What is the atomic number of hydrogen?\n\nThe context doesn't explicitly mention the atomic number of hydrogen. However, it does explain that the atomic number of an element is equal to the number of protons it contains. Since hydrogen is not discussed in the context of its atomic structure, we cannot determine its atomic number from the provided information.\n\n2. Calculate the atomic mass of carbon.\n\nAccording to the context, the mass number (or atomic mass) of an element is the sum of its protons and neutrons. For carbon-12, the context states that it has 6 protons and 6 neutrons, resulting in a mass number of 12. Therefore, the atomic mass of carbon-12 is 12.\n\nFor carbon-14, it has 6 protons and 8 neutrons, resulting in a mass number of 14. Therefore, the atomic mass of carbon-14 is 14.\n\nNote that the context doesn't provide a general formula or a single value for the atomic mass of carbon, as it 