# Model Evaluation

Comparing James' (HealthServe Bot) results based on different parameters tested. <br>

MMR / Similarity Threshold = 0.5 / 0.8 <br>
K (number of source documents) = 5 / 10

In [1]:
# Import libraries
import numpy as np
import pandas as pd

In [2]:
# Read the UAT data into a DataFrame named uat_df
uat_df = pd.read_excel('UAT.xlsx')
uat_df.sample(5)


Unnamed: 0,Question,Bloom's Taxonomy,Question.1,Expected answer,source doc,source chunck,MMR,Similarity threshold,k value,Jame's ans,cosine-similarity matrix,Best way?,Best k
24,5,Evaluate,A migrant worker has sustained an injury in th...,He should claim his insurance through work inj...,2020 SJM Reach Paper “Are migrant workers in S...,"1-5. Act (WICA), which mandates employers to p...",1.0,,5,The migrant worker should claim his insurance ...,,,
2,1,Remember,Who is required to be on the PCP?,PCP is mandatory for Work Permit (excluding mi...,Primary Care Plan (by MOM).pdf,1-5. What is the Primary Care Plan (PCP) You m...,,0.8,5,The Personal Care Assistant (PCA) program is d...,,,
14,3,Apply,What are some cases in which WICA can be clai...,Employee met with a traffic accident while tak...,Work Injury Compensation Act (by MOM) - Comple...,1-5. WICA 2019 AT A GLANCE Who is covered? All...,1.0,,5,Under the Work Injury Compensation Act (WICA) ...,,,
13,3,Apply,What are some cases in which WICA can be clai...,Employee met with a traffic accident while tak...,Work Injury Compensation Act (by MOM) - Comple...,1-10. WICA 2019 AT A GLANCE Who is covered? Al...,,0.8,10,"Under WICA, employees can claim for various ca...",,,
16,4,Analyze,What is the difference between WICA and PCP,PCP is a plan that provides eligible workers w...,Primary Care Plan (by MOM).pdf,1-10. What is the Primary Care Plan (PCP) You ...,,0.5,10,The difference between WICA (Work Injury Compe...,,,


In [None]:
uat_df.isnull().sum()

In [3]:
# change NaN to 0 in mmr column
uat_df['MMR'] = uat_df['MMR'].fillna(0)

# drop last 3 columns
uat_df = uat_df.drop(uat_df.columns[[10, 11, 12]], axis=1)

uat_df.head()

Unnamed: 0,Question,Bloom's Taxonomy,Question.1,Expected answer,source doc,source chunck,MMR,Similarity threshold,k value,Jame's ans
0,1,Remember,Who is required to be on the PCP?,PCP is mandatory for Work Permit (excluding mi...,Primary Care Plan (by MOM).pdf,1-3 . What is the Primary Care Plan (PCP) You ...,0.0,0.5,5,The Primary Care Plan (PCP) is mandatory for W...
1,1,Remember,Who is required to be on the PCP?,PCP is mandatory for Work Permit (excluding mi...,Primary Care Plan (by MOM).pdf,1-7. What is the Primary Care Plan (PCP) You m...,0.0,0.5,10,The Primary Care Plan (PCP) is mandatory for W...
2,1,Remember,Who is required to be on the PCP?,PCP is mandatory for Work Permit (excluding mi...,Primary Care Plan (by MOM).pdf,1-5. What is the Primary Care Plan (PCP) You m...,0.0,0.8,5,The Personal Care Assistant (PCA) program is d...
3,1,Remember,Who is required to be on the PCP?,PCP is mandatory for Work Permit (excluding mi...,Primary Care Plan (by MOM).pdf,1-10. What is the Primary Care Plan (PCP) You ...,0.0,0.8,10,The Personal Care Assistant (PCA) program is d...
4,1,Remember,Who is required to be on the PCP?,PCP is mandatory for Work Permit (excluding mi...,Primary Care Plan (by MOM).pdf,1-5. What is the Primary Care Plan (PCP) You m...,1.0,,5,The Primary Care Plan (PCP) is mandatory for W...


In [4]:
# for every data row, calculate cosine similarity between expected answer & james answer

# Import libraries
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Define a function that takes in two strings and calculates their cosine similarity
def calculate_cosine_similarity(string1, string2):
    # Create a CountVectorizer object
    count_vectorizer = CountVectorizer()
    # Transform the input strings using count_vectorizer
    count_vectorizer.fit([string1, string2])
    string1_vector = count_vectorizer.transform([string1])
    string2_vector = count_vectorizer.transform([string2])
    # Calculate the cosine similarity between the two vectors
    cosine = cosine_similarity(string1_vector, string2_vector)[0][0]
    return cosine


In [5]:
# Define a function that takes in a row and calculates the cosine similarity between the expected answer and James's answer
# add the cosine value to the row under a column named Cosine Similarity
for index, row in uat_df.iterrows():
    uat_df.loc[index, 'Cosine Similarity'] = calculate_cosine_similarity(row['Expected answer'], row["Jame's ans"])

In [6]:
# Read output data into a excel
uat_df.to_excel('output.xlsx', index=False)

In [7]:
# Calculate average mean of cosine similarity between MMR and Similarity Search
uat_df.groupby('MMR')['Cosine Similarity'].mean()

MMR
0.0    0.518847
1.0    0.564454
Name: Cosine Similarity, dtype: float64

In [8]:
# From MMR, group by k and calculate average mean of cosine similarity

uat_df.groupby(['MMR', 'k value'])['Cosine Similarity'].mean()

MMR  k value
0.0  5          0.517977
     10         0.519716
1.0  5          0.564454
Name: Cosine Similarity, dtype: float64

### Evaluation Part 2

We will use cosine similarity matrix to evaluate who is best: James/ ChatGPT/ Perplexity

In [9]:
# Read the UAT2 data into a DataFrame named uat_df
uat2_df = pd.read_excel('UAT2.xlsx')
uat2_df.sample(5)

Unnamed: 0,Index,Bloom's Taxonomy Category,Question,Expected Answer,Parameters,James,ChatGPT,Perplexity,Which is better (qualitative)?,Which is better (quantittative)?
0,1.0,Understand,Why am I not covered by WICA insurance,WICA covers workers engaged under a “contract ...,"mmr, k=5",You may not be covered by WICA insurance if yo...,There could be several reasons why you may not...,According to the Work Injury Compensation Act ...,,
4,5.0,Understand,What restrictions does PCP have?,There is a list of standard exceptions that wo...,"mmr, k=5",The PCP has the following restrictions:\n- It ...,"The term ""PCP"" can have various meanings in di...","PCP, or primary care provider, is a term used ...",,
35,35.0,Analyze,"compare the dental coverage under WICA, EFMA, ...",WICA covers medical expenses for work-related ...,"mmr, k=5",Dental coverage is included under both WICA an...,As of my last knowledge update in January 2022...,It is important to note that the search result...,,
30,30.0,Apply,"Given that the budget is $50, suggest affordab...",HealthServe operates a community and dental cl...,"mmr, k=5","Based on the information provided, HealthServe...","Considering the budget constraint of $50, here...",If you have a budget of $50 and are looking fo...,,
9,10.0,Remember,Recall the basic rights of migrant workers rel...,There are three main laws regulating medical o...,"mmr, k=5",Migrant workers in Singapore have basic rights...,Migrant workers in Singapore are entitled to c...,Migrant workers in Singapore have certain basi...,,


In [10]:
# Drop last 2 columns
uat2_df = uat2_df.drop(uat2_df.columns[[8,9]], axis=1)

In [11]:
uat2_df.isnull().sum()

Index                        1
Bloom's Taxonomy Category    0
Question                     0
Expected Answer              0
Parameters                   0
James                        0
ChatGPT                      0
Perplexity                   1
dtype: int64

In [12]:
# Compare cosine similarity between James and Expected Answer, repeat for Perplexity & ChatGPT
for index, row in uat2_df.iterrows():
    uat2_df.loc[index, 'James Similarity'] = calculate_cosine_similarity(row['Expected Answer'], row["James"])
    uat2_df.loc[index, 'Perplexity Similarity'] = calculate_cosine_similarity(row['Expected Answer'], row['Perplexity'])
    uat2_df.loc[index, 'ChatGPT Similarity'] = calculate_cosine_similarity(row['Expected Answer'], row['ChatGPT'])


# Bot with highest cosine similarity is the best quantitatively, add column for that and iterrate through every row
for index, row in uat2_df.iterrows():
    if row['James Similarity'] > row['Perplexity Similarity'] and row['James Similarity'] > row['ChatGPT Similarity']:
        uat2_df.loc[index, 'Best Bot (quantitatively)'] = 'James'
    elif row['Perplexity Similarity'] > row['James Similarity'] and row['Perplexity Similarity'] > row['ChatGPT Similarity']:
        uat2_df.loc[index, 'Best Bot (quantitatively)'] = 'Perplexity'
    elif row['ChatGPT Similarity'] > row['James Similarity'] and row['ChatGPT Similarity'] > row['Perplexity Similarity']:
        uat2_df.loc[index, 'Best Bot (quantitatively)'] = 'ChatGPT'
    else:
        uat2_df.loc[index, 'Best Bot (quantitatively)'] = 'Tie'

# Read output data into a excel
uat2_df.to_excel('output2.xlsx', index=False)

ValueError: np.nan is an invalid document, expected byte or unicode string.

In [None]:
# Calculate average cosine similarity for James, Perplexity and ChatGPT
uat2_df.groupby('Best Bot (quantitatively)')['James Similarity', 'Perplexity Similarity', 'ChatGPT Similarity'].mean()