## Importing the necessary libraries

In [1]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_community.embeddings import OllamaEmbeddings
from langchain.document_loaders import DirectoryLoader

In [2]:
import torch
torch.cuda.is_available()

False

## Loading the pdf file

In [3]:
from langchain_community.document_loaders import PyMuPDFLoader

# Load the pdf file
loader = DirectoryLoader(
    "./",glob="**/*.pdf" , show_progress=True, loader_cls=PyMuPDFLoader
)
docs = loader.load()

100%|██████████| 2/2 [00:00<00:00, 13.07it/s]


In [4]:
docs

[Document(page_content=' \n \n1/16 \n \nSuppliers Agreement \nTHIS AGREEMENT is made effective on 1st of June 2020 (the “Effective Date”) between:            \n(1) \nMarcas International Ltd, incorporated at Baltic House, Station Road, Ballasalla, Isle \nof Man, IM9 2AE  and registered in the UK at 1st Floor, 63 Queen Victoria Street, \nLondon EC4N 4UA, United Kingdom for corporation tax purposes (“Marcas”); and \n(2) \nFuruno [XXX] with registered office [XXX] (“Supplier”). \nRECITALS: \n(A) \nMarcas carries on business negotiating terms for the supply of Services and Products \nfor its Members; \n(B) \nMarcas, on behalf of its Members, desires to agree the terms and conditions on which \nSupplier will supply Services and Products. \n(C) \nSupplier wishes to agree such terms and conditions with Marcas. \nOPERATIVE PROVISIONS: \n1 \nINTERPRETATION \n1.1 \nDefinitions. In this Agreement, the following definitions apply: \nData Protection Laws: all applicable laws, rules, regulation, dir

## Splitting the pdf file into chunks

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OllamaEmbeddings(model="mistral"))

## Creating the vector database and the prompt

In [None]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
llm_model = 'mistral'
llm = ChatOllama(model=llm_model, temperature=0.0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
)

In [None]:
prompt = hub.pull("rlm/rag-prompt")

In [None]:
prompt

## Finalizing the rag chain

In [None]:
from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
        RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
        | prompt
        | llm
        | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [None]:
response = rag_chain_with_source.invoke("What is the goal of the study?")
response

## Running the rag chain

In [None]:
answer = rag_chain_with_source.invoke("What is betweeness centrality?")

In [None]:
# Get the answer from the rag chain and print it.
answer["answer"]

In [None]:
rag_chain_with_source.invoke("Who is the Supervising Lecturer?")

## Evaluating the rag chain

In [None]:
from langchain.evaluation.qa import QAGenerateChain

example_gen_chain = QAGenerateChain.from_llm(ChatOllama(model=llm_model,temperature=0.5))

In [None]:
new_examples = example_gen_chain.apply(
    [{"doc": t} for t in docs[:5]]
)

In [None]:
new_examples

In [None]:
reformatted_examples = [{'query': pair['qa_pairs']['query'], 'answer': pair['qa_pairs']['answer']} for pair in
                        new_examples]
reformatted_examples

In [None]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True,
    chain_type_kwargs={
        "document_separator": "<<<<>>>>>"
    }
)

In [None]:
predictions = qa.batch(reformatted_examples)
predictions

In [None]:
from langchain.evaluation.qa import QAEvalChain

llm = ChatOllama(temperature=1, model=llm_model)
eval_chain = QAEvalChain.from_llm(llm)

In [None]:
graded_outputs = eval_chain.evaluate(reformatted_examples, predictions)

In [None]:
graded_outputs

In [None]:
for i, eg in enumerate(new_examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Result: " + graded_outputs[i]['results'])
    print("Is correct: " + "No" if "INCORRECT" in graded_outputs[i]['results'] else "Yes")
    print()