In [None]:
# !pip install faiss-cpu #if you are using 'chroma', then this is not required
# !pip install pypdf
# !pip install langchain_community
# !pip install langchain_huggingface
#!pip install chromadb

In [None]:
from langchain_community.document_loaders import PyPDFLoader

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma

In [None]:
from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings

In [None]:
# loader = PyPDFLoader("mukti_legal_land_verdict.pdf")
loader = PyPDFLoader("Nestle_Annual-Report-2023-24_abstract.pdf")
#pages = loader.load_and_split() #page-wise

In [None]:
# pages

In [None]:
model_name = 'sentence-transformers/all-mpnet-base-v2'
model_kwargs = {"trust_remote_code":True}#"device":"cuda"
embeddings = HuggingFaceEmbeddings(model_name=model_name,model_kwargs=model_kwargs,show_progress=True)

In [None]:
docs = loader.load()

In [None]:
# embeddings = ""

In [None]:
text_splits = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)
all_splits = text_splits.split_documents(docs)

In [None]:
vectordb = Chroma.from_documents(documents=all_splits,embedding=embeddings)#persist_directory = "chroma_db"

In [None]:
retriever = vectordb.as_retriever(search_kwargs={'k':4},search_type="mmr")#'fetch_k':50

In [None]:
query = "what is total liabilities for the year 31 March 2024?"
retriever.invoke(query)

In [None]:
vectordb.similarity_search_with_score(query,k=5)

In [None]:
from transformers import AutoModelForCausalLM,AutoTokenizer,pipeline,BitsAndBytesConfig
import torch

In [None]:
model_id = 'models--meta-llama--Llama-3.2-3B-Instruct\\snapshots\\0cb88a4f764b7a12671c53f0838cd831a0843b95'
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True, #4-bit quantization
)

tokenizer = AutoTokenizer.from_pretrained(model_id,device_map='cuda')
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    quantization_config = bnb_config,
    #torch_dtype = torch.bfloat16,#single quant
    device_map = 'cuda',
)
tokenizer.pad_token_id = tokenizer.eos_token_id


In [None]:
pipe=pipeline("text-generation",model=model,
              tokenizer=tokenizer,max_new_tokens=512,
              do_sample=False,
              repetition_penalty=1.03)
llm=HuggingFacePipeline(pipeline=pipe)

In [None]:
## for LLMs
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = retriever,
    verbose=True
)
query = "what is total liabilities for the year 31 March 2024?"
qa.invoke(query)