In [40]:
import os
import json
import time
import re
import numpy as np
from tqdm import tqdm
from operator import itemgetter
from PyPDF2 import PdfReader
from ragas.metrics.critique import harmfulness
from ragas import evaluate
import pandas as pd
from datasets import Dataset
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.chat_history import BaseChatMessageHistory
from langchain.storage import LocalFileStore
from langchain.retrievers import ParentDocumentRetriever
from langchain.schema.output_parser import StrOutputParser
from langchain.storage import InMemoryStore
from langchain.storage._lc_store import create_kv_docstore
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain_ollama import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from ragas.llms import LangchainLLMWrapper
from langchain.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama
from sklearn.metrics.pairwise import cosine_similarity
from langchain_core.documents import Document
import math
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision
)

LangSmith Montitoring Client

In [41]:
from langsmith import Client

client = Client()

Embeddings LLM

In [42]:

embeddings = OllamaEmbeddings(
    model="llama3",
)

Cosine Similarity

In [43]:
def cosine_sim(s1, s2) :
    u = embeddings.embed_query(s1)
    u = np.array(u)
    v = embeddings.embed_query(s2)
    v = np.array(v)
    dot_product = sum(i * j for i, j in zip(u, v))
    norm_u = math.sqrt(sum(i*i for i in u))
    norm_v = math.sqrt(sum(i*i for i in v))
    cos_theta = dot_product/(norm_u * norm_v)

    return cos_theta

Extract PDFs

In [44]:
def extract_pdfs(pdfs):
    #Returns the string with all PDF content.
    l = ""
    for pdf in pdfs:
        title = pdf[7:]
        if(title != "The Sexual Harassment of Women at Workplace.pdf"):
            continue
        print(title)
        reader = PdfReader(pdf)

        for i, page in enumerate(reader.pages):
            if(i < 2):
                continue
            else :
                raw = page.extract_text()
                l += raw
    return l

Importing the generated dataset

In [45]:

def load_data(file_name) :
    data = None
    try:
        with open(file_name, 'r') as file:
            data = json.load(file)
    except FileNotFoundError:
        print(f"The file {file_name} is not found")
        return None
    
    metadata = data['config']
    dataset = data['data']
    return dataset

Creating the RAGAS Dataset

In [46]:
def create_ragas_dataset(rag_chains, dataset) :
    rag_datset = []
    j = 0
    for r in tqdm(dataset):
        if(j > 10):
            break
        maxi_p = 0
        ans = None
        con = None
        for chain in rag_chains:
            answer = chain.invoke({"question" : r["question"], "title" : r["title"]})
            maxi = 0
            con_c = None
            for context in answer["context"]:
                sim_score = cosine_sim(context.page_content, answer["response"].content)
                if sim_score > maxi:
                    maxi = sim_score
                    con_c = context.page_content
            if sim_score > maxi_p:
                ans = answer["response"].content
                con = con_c
        print(ans)
        input_dict = {
            "question":r["question"],
            "ground_truth":r["ground_truth"],
            "contexts": [con],
            "answer": ans,
        }
        rag_datset.append(input_dict)
        j += 1
    rag_df = pd.DataFrame(rag_datset)
    rag_eval_dataset = Dataset.from_pandas(rag_df)
    
    return rag_eval_dataset

Evaluating the dataset

In [47]:
from ragas.run_config import RunConfig
def evaluate_ragas_datset(ragas_dataset):
    result = evaluate(
    dataset=ragas_dataset,
    llm=ChatOllama(model="llama3"),
    embeddings=embeddings,
    metrics=[
        faithfulness,
        answer_relevancy,
        context_precision,
        context_recall
    ]
    )
    return result

In [48]:
pdfs = []
for _,_,files in os.walk('./Acts', topdown=True):
    for file in files:
        path = './Acts/' + file
        pdfs.append(path)

docs = extract_pdfs(pdfs)

documents_rec = Document(
        page_content=docs,
        metadata={"source":"The Sexual Harassment of Women at Workplace"}
    )

The Sexual Harassment of Women at Workplace.pdf


Parent Document Retrieval

In [56]:
parent_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        separators=[r'[0-9]{1}?[0-9]{1}[.][\s][\D][\bCHAPTER \s\b]','.','\n'],
        is_separator_regex=True
)
child_splitter = RecursiveCharacterTextSplitter(
        chunk_size=250,
        chunk_overlap=100,
        separators=[r'[0-9]{1}?[0-9]{1}[.][\s][\D][\bCHAPTER \s\b]','.','\n'],
        is_separator_regex=True
)
fs = LocalFileStore("./store_location")
store = create_kv_docstore(fs)

vectorstore = Chroma(
    collection_name="my_collection",
    embedding_function=embeddings,
    persist_directory="./db",
)

retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter
)

#retriever.add_documents([documents_rec])

Verifying Document Retrieval

In [50]:
num_parent_docs = len(list(retriever.docstore.yield_keys()))
num_child_docs = len(set(retriever.vectorstore.get()['documents']))

print (f"You have {num_parent_docs} parent docs and {num_child_docs} child docs")

You have 44 parent docs and 254 child docs


In [51]:
list(store.yield_keys())

['ba1fac9e-b449-4022-b6f5-98dc4a3141c1',
 'f3b9ba68-17f9-47aa-9e7d-ea9f671716b6',
 '341d4a03-3a15-4979-92b4-bc5ee7114188',
 'cefe9fcb-5689-4e50-9a72-2b2091716472',
 'e7b81077-10b9-4ab8-9ee0-85dd1e0da249',
 '5650afad-af51-4c01-a923-cb19e11deb48',
 '4a789653-2e3c-4845-8a3f-06c51d384b15',
 '9c91ce55-8717-47e2-b9ed-69ef35f48b91',
 'e2bcec02-9cdd-41af-bd51-a79501537e2b',
 '48d45cb4-0da4-4c79-8965-e94b70e1334f',
 'c9c02000-eb3f-42b7-ac96-fcac56fe9c95',
 '5e5e7173-466a-416a-b2ce-6099dd0c9d0c',
 '60e76fcf-b85f-4158-8c2b-e3e5d14778d4',
 '03e2cd13-113c-4c5b-a6f9-5a04bc291dad',
 '762f0f30-8c66-4655-a1b7-d33a4594372a',
 '59796840-920a-4817-abaa-3a7bbfa52d9b',
 'e1d97783-d9cc-4bc7-b4e4-8d40e7ce1a51',
 '57ac6bd6-be12-4621-b111-45c1f4dbee5e',
 'dfe392c1-2985-4d8d-b319-6fed0b776231',
 'fc573cfa-d064-4ce2-b666-ffbe643213e0',
 '594263f1-f6f2-46dd-bec7-62efe342c705',
 '83b33416-6a6f-4b2f-ae22-afe651bdcd63',
 '68d0db8a-8ba0-42e5-842f-4eccc1de2c95',
 'e02b6d82-ceae-4176-8c21-0ecff1944d5c',
 'd651df5c-6ec5-

In [52]:
child_docs = vectorstore.similarity_search("What is internal complaints committee?")

print (f"Child docs found: {len(child_docs)}")
print('-----------------------------')
print('Selected doc content:')
print(child_docs[0].page_content)

Child docs found: 4
-----------------------------
Selected doc content:
Local Committee may, for 
the reasons to be recorded in writing, extend the time limit not exceeding three months , if it is satisfied 
that the circumstances were such which prevented the woman from filing  a complaint within the said 
period.  
(2)


In [53]:
id = child_docs[0].metadata['doc_id']
v = store.mget([id])
print(v[0].page_content)

Local Committee may, for 
the reasons to be recorded in writing, extend the time limit not exceeding three months , if it is satisfied 
that the circumstances were such which prevented the woman from filing  a complaint within the said 
period.  
(2) Where the aggrieved woman is unable to make a complaint on account of   her physical or mental 
incapacity or death or otherwise, her legal heir or such other person as may be prescribe d may make a 
complaint under this section.  
10. Conciliation. —(1) The Internal Committee or, as the case may be, the Local Committee, may, 
before initiating an inquiry under section 11 and at the request of the aggrieved woman take steps to settle 
the matter between her and the respondent through conciliation:  
Provided that no monetary settlement shall be made as a basis of conciliation.  
(2) Where settlement has been arrived at under sub -section (1), the Internal Committee or the Local 
Committee, as the  case may be, shall record the settlement s

In [55]:
retrieved_docs = retriever.invoke("What is internal complaints committee?")

print(f"Parent retrieved docs found: {len(retrieved_docs)} ")
print('-----------------------------')
print('Selected doc content:')
print(retrieved_docs[0].page_content)

Parent retrieved docs found: 4 
-----------------------------
Selected doc content:
Local Committee may, for 
the reasons to be recorded in writing, extend the time limit not exceeding three months , if it is satisfied 
that the circumstances were such which prevented the woman from filing  a complaint within the said 
period.  
(2) Where the aggrieved woman is unable to make a complaint on account of   her physical or mental 
incapacity or death or otherwise, her legal heir or such other person as may be prescribe d may make a 
complaint under this section.  
10. Conciliation. —(1) The Internal Committee or, as the case may be, the Local Committee, may, 
before initiating an inquiry under section 11 and at the request of the aggrieved woman take steps to settle 
the matter between her and the respondent through conciliation:  
Provided that no monetary settlement shall be made as a basis of conciliation.  
(2) Where settlement has been arrived at under sub -section (1), the Internal C

Defining LLMs for Ensemble

In [24]:
llm1 = ChatOllama(model="gemma2:2b")
llm2 = ChatOllama(model="llama3:latest")
llm3 = ChatOllama(model="mistral:latest")
llm4 = ChatOllama(model="nemotron-mini")

Generation Prompt

In [22]:
qa_system_prompt = """You are an experienced and skilled indian corporate laywer who answers the queries and provide explaination
    to the indian citizens queries. For the given question answer the question given to you based only on the following context and title. If you don't know the answer, just say that you don't know.

    <context>
    {context}
    </context>
    """

from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        ("human", "Question : {question} Title: {title}"),
    ]
)

RAG Chains

In [27]:
chain1 = (
        {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
        "title": itemgetter("title"),
        }
        | RunnablePassthrough.assign(
                context=itemgetter("context")
        )
        | {
         "response": prompt | llm1,
         "context": itemgetter("context"),
        }
)

chain2 = (
        {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
        "title": itemgetter("title"),
        }
        | RunnablePassthrough.assign(
                context=itemgetter("context")
        )
        | {
         "response": prompt | llm2,
         "context": itemgetter("context"),
        }
)

chain3 = (
        {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
        "title": itemgetter("title"),
        }
        | RunnablePassthrough.assign(
                context=itemgetter("context")
        )
        | {
         "response": prompt | llm3,
         "context": itemgetter("context"),
        }
)

chain4 = (
        {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
        "title": itemgetter("title"),
        }
        | RunnablePassthrough.assign(
                context=itemgetter("context")
        )
        | {
         "response": prompt | llm4,
         "context": itemgetter("context"),
        }
)

rag_chains = [chain1, chain2, chain3]

Input the Question

In [33]:
maxi_p = 0
ans = None
con = None
for chain in rag_chains:
    answer = chain.invoke({"question" : "What are the duties of the District Officer?", "title" : "The sexual harrasement of women at workplace."})
    maxi = 0
    con_c = None
    for context in answer["context"]:
        sim_score = cosine_sim(context.page_content, answer["response"].content)
        if sim_score > maxi:
            maxi = sim_score
            con_c = context.page_content
    if sim_score > maxi_p:
        ans = answer["response"].content
        con = con_c
print(ans)

 Based on the provided context, there is no information about the duties of a District Officer in relation to "The Sexual Harassment of Women at Workplace" Act. This Act primarily discusses the duties of employers towards their female employees in an attempt to prevent sexual harassment and provide mechanisms for handling complaints. The text mentions various provisions regarding the composition of Internal Committees, time limits for filing complaints, conciliation procedures, and the power of the Central Government to remove difficulties in giving effect to the Act's provisions. However, it does not mention any specific duties or roles assigned to a District Officer in this context.


## Streaming API

In [None]:
llm=ChatOllama(model="llama3"),

In [None]:
qa_system_prompt = """You are an experienced and skilled indian corporate laywer who answers the queries and provide explaination
    to the indian citizens queries. For the given question answer the question given to you based only on the following context and title. If you don't know the answer, just say that you don't know.

    <context>
    {context}
    </context>
    """

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "Question : {question} Title: {title}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

In [None]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it, if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [None]:
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [48]:
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [None]:
question = "What is sexual Harassment?"
async for chunk in conversational_rag_chain.astream(
    {"input": question, "question" : question, "title" : "The Sexual Harassment of Women at Workplace"},
    config={
        "configurable": {"session_id": "abc123"}},
    ):
        if('answer' in chunk):
            print(chunk['answer'])
            # content = chunk['answer'].replace("\n", '<br>')
            # yield f"data : {content}\n\n"

In [54]:
from tqdm import tqdm
async def generate_chat(question) :
    print("Process Started")
    print(question)
    async for chunk in conversational_rag_chain.astream(
    {"input": question, "question" : question, "title" : "The Sexual Harassment of Women at Workplace"},
    config={
        "configurable": {"session_id": "abc123"}},
    ):
        if('answer' in chunk):
            content = chunk['answer'].replace("\n", "<br>")
            yield f"data : {content}\n\n"

In [None]:
from fastapi import FastAPI, Response
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=['*']
)

@app.get("/")
async def root():
    return FileResponse("./index.html")

@app.get("/get-response/{data}")
async def get_stream(data : str, response : Response):
    response.headers["content-type"] = "text/event-stream"
    return StreamingResponse(generate_chat(question=data), media_type="text/event-stream")

if __name__ == '__main__':
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

## Evaluation

Creating the evaluation dataset

In [34]:
eval_dataset = load_data("final_mean_questions.json")
rag_eval_dataset = create_ragas_dataset(rag_chains, eval_dataset)

  1%|          | 1/168 [01:39<4:36:20, 99.28s/it]

 According to the context provided from 'The Sexual Harassment of Women at Workplace' Act, there is no specific provision that restricts or allows a non-governmental organization (NGO) to nominate multiple members to the Internal Complaints Committee (ICC). However, it is mentioned that at least one member should be a woman and have a background in law or legal knowledge. As such, if an NGO wishes to nominate multiple members to the ICC, they can do so, as long as the required qualifications for the specified positions are met. It's always advisable to consult with a legal expert to ensure compliance with any specific regulations or interpretations of the Act.


  1%|          | 2/168 [03:06<4:15:10, 92.23s/it]

 In the given context from the "The Sexual Harassment of Women at Workplace" act, section 16 does not explicitly appear. However, I can provide information regarding penalties for non-compliance with other provisions of the Act, which may give an idea of the possible consequences for violating similar sections.

If an employer fails to constitute an Internal Committee under sub-section (1) of section 4, take action under sections 13, 14, and 22, or contravenes or attempts to contravene other provisions of this Act or any rules made thereunder, they shall be punishable with a fine which may extend up to fifty thousand rupees. If such an employer has been previously convicted of the same offense and subsequently commits it again, they may face a punishment twice the amount of the first conviction, subject to the maximum punishment provided for the same offense, or as prescribed under any other law in force at that time (Source: The Sexual Harassment of Women at Workplace Act).

It should

  2%|▏         | 3/168 [04:30<4:02:48, 88.30s/it]

 According to the given context, there is no specific information provided about the requirements for the composition of the Local Complaints Committee under the Sexual Harassment of Women at Workplace (Prevention, Prohibition and Redressal) Act regarding the nomination of members from social work, local bodies, and NGOs committed to women's causes. However, it is stated that every employer should constitute an Internal Committee under section 4(1) of the act, and among the members of this committee, one member may be a non-governmental organization representative. But there is no explicit information about the Local Complaints Committee in this regard. Therefore, I would recommend referring to the relevant rules or regulations prescribed under the Act for more detailed information on the composition of the Local Complaints Committee.


  2%|▏         | 4/168 [05:47<3:49:44, 84.05s/it]

 According to the provided context from "The Sexual Harassment of Women at Workplace" Act, there is no explicit requirement for nominating two members, at least one of whom is a woman, for the district's RD (Rural Development) or municipality committee specifically based on their commitment to women's causes or expertise in sexual harassment issues. However, it does mention that every employer should organize workshops and awareness programs at regular intervals for sensitizing employees with the provisions of the Act and orientation programs for members of the Internal Committee. This implies that expertise in sexual harassment issues could be beneficial when nominating members for such committees. Additionally, the context suggests that a background in law or legal studies may also be preferable since the Act deals with legal matters related to sexual harassment at the workplace. It's essential to note that local rules and regulations may have additional requirements for committee me

  3%|▎         | 5/168 [07:02<3:39:27, 80.78s/it]

 In the context given, an authority can call upon an employer or a District Officer to provide written information and authorize an officer to conduct an inspection and submit a report under the following circumstances:

1. When there is a requirement to investigate cases of sexual harassment. The authority may do so by issuing an order specifying the period within which the report must be submitted.

2. The employer or District Officer is required to produce all information, records, and other documents in their custody that have a bearing on the subject matter of inspection related to sexual harassment, upon demand from the officer making the inspection.


  4%|▎         | 6/168 [08:36<3:49:35, 85.03s/it]

 According to the provided context, a court can take cognizance of an offence punishable under the Sexual Harassment of Women at Workplace Act or its rules in the following circumstances:

1. If the aggrieved woman files a complaint under section 9(1) or (2) of the act and the Internal Committee or Local Committee arrives at a conclusion that during the inquiry any witness has given false evidence or produced any forged or misleading document, then the court can take cognizance.

2. If the respondent is convicted under section 509 of the Indian Penal Code (45 of 1860), or any other relevant provisions of the said Code where applicable, and the aggrieved woman informs the Internal Committee or Local Committee that any term or condition of the settlement arrived at under section 10(2) has not been complied with by the respondent.

3. In case both the parties are employees, if during the course of the inquiry, either party is given an opportunity to be heard and if either party makes a re

  4%|▍         | 7/168 [10:34<4:17:34, 95.99s/it]

 As per the provided context from "The Sexual Harassment of Women at Workplace" Act, when a settlement is reached, the records should be maintained by the Internal Committee or the Local Committee. The provisions do not explicitly mention how to provide these settlement records, but they should be preserved in an appropriate manner as per any rules prescribed.

If either party informs that the agreed terms of the settlement have not been complied with by the respondent, the Internal Committee or the Local Committee is required to proceed with an inquiry into the complaint or forward it to the police if necessary. In both cases, during the course of the inquiry, both parties will be given an opportunity to be heard. A copy of the findings shall be made available to both parties, enabling them to make representations against the findings before the Committee.

However, there is no explicit mention of further inquiries or procedures following the initial settlement and subsequent non-comp

  5%|▍         | 8/168 [12:24<4:27:46, 100.41s/it]

 According to the provided context, the Sexual Harassment of Women at Workplace Act does not specify any factors that the Internal Committee or Local Committee should consider while determining the sums to be paid to the aggrieved woman under clause (ii) of sub-section (3) of section 13. The Act only mentions that the amount shall be reasonable and compensatory in nature, taking into account any financial loss, mental trauma, and physical distress suffered by the victim due to sexual harassment. However, the determination of the exact amount is left to the discretion of the Internal Committee or Local Committee. It is advisable to consult with a legal expert for a detailed understanding as this might vary case-by-case.


  5%|▌         | 9/168 [14:53<5:06:43, 115.75s/it]

 In the given context, there is no specific provision mentioned for a scenario where a woman has experienced sexual harassment at her workplace by an individual who is not an employee. However, we can derive some potential steps based on the general provisions mentioned in the act.

According to the document, the District Officer (DO) can authorize any officer to make inspections of the records and workplace related to sexual harassment (Section 25). Since the individual causing harassment is not an employee, the employer may not be directly involved, but the DO could still order an inspection of the premises where the alleged incident took place.

If evidence of sexual harassment is found during the inspection, the DO can consider it as misconduct under any applicable law for the time being in force (Section 26 - if no service rules exist). The DO can then take appropriate action against the offender.

Furthermore, under Chapter VI: Duties of Employer, every employer is required to pr

  6%|▌         | 10/168 [16:25<4:45:10, 108.30s/it]

 According to the provided context from "The Sexual Harassment of Women at Workplace" document, if there is no senior-level woman employee available in the workplace, the Presiding Officer for investigating cases of sexual harassment can be nominated as follows:

1. In the absence of a woman employee, the employer can nominate any woman from outside the workplace to serve as the Presiding Officer. This is inferred from the fact that section 4(1) requires the employer to constitute an Internal Committee which must have at least one woman member preferably with experience in dealing with sexual harassment cases and the other member who could be a senior-level employee or a person who has knowledge of the subject matter.

2. If no suitable woman from outside the workplace can be found, the employer may constitute an Internal Committee consisting of only one member who is a senior-level employee or someone with knowledge of the subject matter. However, in such cases, it would be advisable 

  7%|▋         | 11/168 [18:05<4:18:14, 98.69s/it] 

 Based on the provided context from "The Sexual Harassment of Women at Workplace" Act, it is clear that an employer is required to constitute an Internal Committee under section 4(1) and take action under sections 13, 14, and 22 in case of sexual harassment complaints. However, the Act does not specifically mention that an employer has to prepare a report on cases filed and dispose of them.

In this context, if an employer has no information about disposal of cases filed under the Sexual Harassment Act, they are only required to disclose the following in their annual report:

1. That an Internal Committee has been constituted as per the provisions of section 4(1) of the Act.
2. The measures taken by the employer for providing a safe working environment at the workplace and safety from the persons coming into contact at the workplace, as mentioned under section 19(a).
3. Displaying the penal consequences of sexual harassments and the order constituting the Internal Committee under sub-s




Saving/Loading the evaluation dataset

In [57]:
# s.to_csv('rag_eval_dataset.csv')
import ast
s = pd.read_csv('rag_eval_dataset.csv')
s['contexts'] = s['contexts'].apply(lambda x: [x[1:len(x) - 1]])
s = s.drop(columns=['Unnamed: 0'])
rag_eval_dataset = Dataset.from_pandas(s)


In [58]:
dat = []
for i in range(0, len(rag_eval_dataset)):
    g = [rag_eval_dataset[i]]
    h = pd.DataFrame(g)
    k = Dataset.from_pandas(h)
    dat.append(k)
len(dat)

212

Evaluating the dataset

In [None]:
res = []
for doc in tqdm(dat):
    time.sleep(0.00001)
    qa_result = evaluate_ragas_datset(doc)
    print(qa_result)
    res.append(qa_result)
res

In [None]:
qa_result = evaluate_ragas_datset(rag_eval_dataset)

Calculating Average of all metrics

In [51]:
avg_faith = avg_rel = avg_cp = avg_cr = 0

l = 0
for doc in res:
    if (math.isnan(doc['faithfulness']) != True and math.isnan(doc['answer_relevancy']) != True and
        math.isnan(doc['context_precision']) != True and math.isnan(doc['context_recall']) != True):
        l += 1
        avg_rel += doc['answer_relevancy']
        avg_faith += doc['faithfulness']
        avg_cp += doc['context_precision']
        avg_cr += doc['context_recall']

print(100*avg_rel/l, 100*avg_faith/l, 100*avg_cp/l, 100*avg_cr/l)

74.35944033539235 41.35136807636807 81.99999999179987 76.68210678210681


Saving the results

In [52]:
with open("ensemble_cs.json", "w") as outfile:
    json.dump(res, outfile)

In [26]:
df = qa_result.to_pandas()
df

Unnamed: 0,question,contexts,answer,ground_truth,answer_relevancy,context_precision,faithfulness,context_recall
0,Which section of the Indian Penal Code (45 of ...,['nder the Indian Penal Code (45 of 1860) or a...,According to Section 29 of the Sexual Harassme...,The Indian Penal Code (45 of 1860) or any othe...,0.843563,1.0,0.666667,1.0
