In [7]:
doc_path = r'Your document path'
api_key ="Your API KEY"

## Creating a Vector database and Retriever

In [8]:
from langchain.document_loaders import PyPDFLoader 
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_ai21.embeddings import AI21Embeddings


loader = PyPDFLoader(doc_path)
doc_pages = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=300,chunk_overlap=10)
all_splits = text_splitter.split_documents(doc_pages)

vector_store = Chroma.from_documents(documents=all_splits,embedding=AI21Embeddings(api_key=api_key))

retriever = vector_store.as_retriever(k=2)

#Function to retrive top k documents for a user query
def doc_retriver(retriever,query=''):
    retriever = vector_store.as_retriever(k=3)
    docs=retriever.invoke(query)
    context = docs
    docs=[]
    for doc in context:
        docs.append(doc.page_content)
    return docs


## Usefull Prompts

In [20]:
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder

prompt_ = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant that answers the question when you know the answerfrom the context, humbly reply that you do not know the answer if you have no information in the context related to the question."),
        MessagesPlaceholder(variable_name='history',optional=True),
        MessagesPlaceholder(variable_name='context',optional=True),
        ("human", "{query}"),
    ]
)


def format_prompt(prompt_template,history=[''],context=[''],query=['rocket?']):
    
    prompt = prompt_template.invoke({
            'history':history,
            'context':context,
            'query':query
        })
    return prompt

format_prompt(prompt_template=prompt_)


ChatPromptValue(messages=[SystemMessage(content='You are a helpful assistant that answers the question when you know the answerfrom the context, humbly reply that you do not know the answer if you have no information in the context related to the question.'), HumanMessage(content=''), HumanMessage(content=''), HumanMessage(content="['rocket?']")])

## Chat History


In [10]:
from langchain.memory import ConversationSummaryMemory

def add_to_history(chat_history_demo,type='',message=''):
    if type=='user':
        chat_history_demo.add_user_message(message=message)
    elif type=='ai':
        chat_history_demo.add_ai_message(message=message)
    return chat_history_demo


# Function to generate summary of the chat history using the chat model
def chat_summariser(chat_history,chat_model):
    memory = ConversationSummaryMemory.from_messages(
    llm=chat_model,
    chat_memory=chat_history,
    return_messages=True
    )
    summary = [memory.buffer]
    return summary

## Chat System

In [24]:
from langchain.memory import ChatMessageHistory
from langchain_ai21 import ChatAI21

chat_model = ChatAI21(model="j2-ultra",api_key="Your API KEY",temperature=0.2,max_tokens=500)
chat_history = ChatMessageHistory()

initiate_flag=True

while True:
    if initiate_flag:
        print('Hi, Welcome !!!')
        print('\n','*'*80,'\n')
        initiate_flag=False
    else:
        query = input()
        if query.lower()=="stop":
            break
        print('User : ',query,'\n')
        chat_history = add_to_history(chat_history_demo=chat_history,type='user',message=query)
        context = doc_retriver(retriever=retriever,query=query)
        
        if len(chat_history.messages)>4:
            summary = chat_summariser(chat_model=chat_model,chat_history=chat_history)
            prompt = format_prompt(prompt_template=prompt_,history=summary,context=context,query=query)
        else:
            prompt = format_prompt(prompt_template=prompt_,history=chat_history.messages,context=context,query=query)
        
        response = chat_model(prompt.messages)

        print('CHAT BOT : ',response.content)
        print('\n','*'*80,'\n')
        chat_history = add_to_history(chat_history_demo=chat_history,type='ai',message=response)
        

Hi, Welcome !!!

 ******************************************************************************** 

User :  what are engine oil properties? 

CHAT BOT :  Engine oil properties include viscosity, thermal stability, detergency, dispersancy, anti-wear, corrosion protection, and foaming tendency. Engine oil also enhances the performance of the engine by reducing friction and to reduce wear. It also cleans, inhibits corrosion, improves sealing, and cools the engine by carrying heat away from moving parts. Engine oils should not enhance the properties of the fuel nor energize the combustion.

 ******************************************************************************** 



## RAGAS EVALUATION

In [48]:
import pandas as pd
from datasets import Dataset

ground_truth_data =pd.read_csv('Path to ground truth data')

quesiton_generating_prompt_ = """
You are a question generator designed to create questions based on provided context. Follow these guidelines to generate questions:
Context: You will be given a paragraph of text, scenario, or relevant information.
Question Requirements: should include specific keywords, target a certain aspect, be clear and unambiguous

Generate a question for below context :
{context_}"""

answer_generating_prompt_ = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant that answers the question when you know the answerfrom the context, humbly reply that you do not know the answer if you have no information in the context related to the question."),
        MessagesPlaceholder(variable_name='context',optional=True),
        ("human", "{query}"),
    ]
)

def create_ragas_dataset(quesiton_generating_prompt_,answer_generating_prompt,golden_truth_data):
    rag_dataset=[]
    for truth in golden_truth_data.ground_truth.tolist():
        quesiton_generating_prompt=quesiton_generating_prompt_.format(context_=truth) 
        question = chat_model.invoke(quesiton_generating_prompt).content
        context = doc_retriver(retriever=retriever,query=question)
        prompt = format_prompt(prompt_template=answer_generating_prompt,context=context,query=question)
        response = chat_model(prompt.messages).content
        rag_dataset.append({
            "question":question,
            "answer":response,
            "contexts":context,
            "ground_truth":truth
        })
        rag_df=pd.DataFrame(rag_dataset)
        eval_dataset = Dataset.from_pandas(rag_df)
    return eval_dataset,rag_df
eval_dataset = create_ragas_dataset(quesiton_generating_prompt_=quesiton_generating_prompt_,
                                    answer_generating_prompt=answer_generating_prompt_,golden_truth_data=ground_truth_data)


Dataset({
    features: ['question', 'answer', 'contexts', 'ground_truth'],
    num_rows: 10
})

In [50]:
from ragas.metrics import (answer_relevancy,faithfulness,context_recall,context_precision)
from ragas.metrics.critique import harmfulness
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

#Use a different model to create the dataset
evaluation_chat_model = LangchainLLMWrapper(ChatAI21(model="Model Name",api_key="Your API KEY",max_tokens=500))
evaluation_embeddings = LangchainEmbeddingsWrapper(AI21Embeddings(api_key="Your API KEY"))

def evaluate_ragas_dataset(ragas_dataset):
    result = evaluate(ragas_dataset,metrics=[context_precision,faithfulness,answer_relevancy,context_recall],
                      llm=evaluation_chat_model,embeddings=evaluation_embeddings)
    return result


result = evaluate_ragas_dataset(eval_dataset)
print(result)