# METRICS CALCULATION FILE

In [23]:

import os 
import tempfile

import chromadb
import streamlit as st
import pandas as pd

from langchain_chroma import Chroma
from pypdf import PdfReader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import CrossEncoder
from streamlit.runtime.uploaded_file_manager import UploadedFile
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, AIMessage
from langchain_ollama import OllamaEmbeddings
from langchain_core.output_parsers import StrOutputParser

from bert_score import BERTScorer
from nltk.translate.meteor_score import meteor_score, single_meteor_score


## Preparing the input text generation file

In [2]:
#read the generated file in
combined_df = pd.read_csv('combined_df.csv')

In [None]:
combined_df.set_index('ID',  inplace=True)

In [7]:
combined_df

Unnamed: 0_level_0,QUESTION,ANSWER,Model1_GA,Model2_GA,Model1a_GA,Model2a_GA
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Define 'asexual reproduction',Asexual reproduction is a type of reproductive...,Asexual reproduction is a type of reproduction...,Asexual reproduction is a type of reproduction...,Asexual reproduction is a type of reproduction...,Asexual reproduction is a type of reproduction...
2,Identify the sort of cell division that is inv...,The cell division in asexual reproduction is m...,Mitosis is the type of cell division involved ...,The type of cell division involved in asexual ...,"Asexual reproduction involves mitosis, which r...","The process involved in asexual reproduction, ..."
3,Outline the ideal environmental conditions for...,Stable and uniform environments with a good su...,The ideal environmental conditions for asexual...,Asexual reproduction is a process where an org...,Asexual reproduction occurs optimally in water...,Asexual reproduction can occur via several met...
4,List the key events in binary fission for bact...,The key events that happen in binary fission f...,Binary fission involves the following key even...,Binary fission is a process by which bacterial...,The key events in binary fission for bacteria ...,Binary fission is a process where bacteria rep...
5,What is the difference between a somatic cell ...,Somatic cells are all the diploid cells in the...,A somatic cell is any cell in the body that is...,"A somatic cell, also known as a body cell or n...",A somatic cell is a diploid cell that contains...,A somatic cell is a non-reproductive body cell...
...,...,...,...,...,...,...
158,Explain how the eye detects different colours.,"There are three types of cone cells, each with...",The human eye contains specialized cells calle...,The eye detects different colors through a pro...,The eye detects different colors through the c...,The eye detects different colours by a combina...
159,Discuss the value of having binocular vision.,Binocular vision requires two eyes quite close...,Having binocular vision allows for depth perce...,Having binocular vision refers to the ability ...,"Binocular vision, which involves both eyes wor...","Having binocular vision, which involves using ..."
160,Arrange the following structures of the excret...,"The largest is kidney, then nephron, glomerulu...","The correct arrangement is: kidney, nephron, g...",The correct arrangement from largest to smalle...,Here is the arrangement of the structures from...,Here are the structures arranged in order from...
161,Name the two hormones responsible for regulati...,ADH (antidiuretic hormone)—responds to water l...,The two hormones responsible for regulating sa...,The two primary hormones involved in regulatin...,The two hormones responsible for regulating sa...,The two hormones responsible for regulating sa...


In [8]:
#turn reference QA into list to be iterable
questions = combined_df['QUESTION'].values.tolist()
reference_answers = combined_df['ANSWER'].values.tolist()

In [9]:
#turn generated QA into list to be iterable
model1_answers = combined_df['Model1_GA'].values.tolist()
model2_answers = combined_df['Model2_GA'].values.tolist()
model1a_answers = combined_df['Model1a_GA'].values.tolist()
model2a_answers = combined_df['Model2a_GA'].values.tolist()

## Compute the BertScore

In [10]:
scorer = BERTScorer(model_type='bert-base-uncased')

Bertscore for Model 1

In [11]:
m1_precision = []
m1_recall = []
m1_f1 = []
for x in range(len(model1_answers)):
    P, R, F1 = scorer.score([model1_answers[x]], [reference_answers[x]])
    m1_precision.append(P.item())
    m1_recall.append(R.item())   
    m1_f1.append(F1.item())

m1_precision = [round(p, 3) for p in m1_precision]
m1_recall = [round(r, 3) for r in m1_recall]
m1_f1 = [round(f, 3) for f in m1_f1]

Bertscore for Model 2

In [12]:
m2_precision = []
m2_recall = []
m2_f1 = []
for x in range(len(model2_answers)):
    P, R, F1 = scorer.score([model2_answers[x]], [reference_answers[x]])
    m2_precision.append(P.item())
    m2_recall.append(R.item())   
    m2_f1.append(F1.item())

m2_precision = [round(p, 3) for p in m2_precision]
m2_recall = [round(r, 3) for r in m2_recall]
m2_f1 = [round(f, 3) for f in m2_f1]

Bertscore for Model 1a

In [13]:
m1a_precision = []
m1a_recall = []
m1a_f1 = []
for x in range(len(model1a_answers)):
    P, R, F1 = scorer.score([model1a_answers[x]], [reference_answers[x]])
    m1a_precision.append(P.item())
    m1a_recall.append(R.item())   
    m1a_f1.append(F1.item())

m1a_precision = [round(p, 3) for p in m1a_precision]
m1a_recall = [round(r, 3) for r in m1a_recall]
m1a_f1 = [round(f, 3) for f in m1a_f1]


Bertscore for Model 2a

In [14]:
m2a_precision = []
m2a_recall = []
m2a_f1 = []
for x in range(len(model2a_answers)):
    P, R, F1 = scorer.score([model2a_answers[x]], [reference_answers[x]])
    m2a_precision.append(P.item())
    m2a_recall.append(R.item())   
    m2a_f1.append(F1.item())

m2a_precision = [round(p, 3) for p in m2a_precision]
m2a_recall = [round(r, 3) for r in m2a_recall]
m2a_f1 = [round(f, 3) for f in m2a_f1]


### Put into CSV

In [15]:
#create index
index_list = []
for i in range(1, len(questions)+1):
    index_list.append(i)

In [16]:
bert_results_df = pd.DataFrame({
    'ID': index_list,
    'BS M1 Precision': m1_precision,
    'BS M1 Recall': m1_recall,
    'BS M1 F1': m1_f1,
    'BS M2 Precision': m2_precision,
    'BS M2 Recall': m2_recall,
    'BS M2 F1': m2_f1,
    'BS M1a Precision': m1a_precision,
    'BS M1a Recall': m1a_recall,
    'BS M1a F1': m1a_f1,
    'BS M2a Precision': m2a_precision,
    'BS M2a Recall': m2a_recall,
    'BS M2a F1': m2a_f1
})

bert_results_df.set_index('ID', inplace=True)

In [17]:
bert_results_df

Unnamed: 0_level_0,BS M1 Precision,BS M1 Recall,BS M1 F1,BS M2 Precision,BS M2 Recall,BS M2 F1,BS M1a Precision,BS M1a Recall,BS M1a F1,BS M2a Precision,BS M2a Recall,BS M2a F1
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,0.610,0.739,0.668,0.586,0.678,0.629,0.668,0.741,0.702,0.652,0.725,0.687
2,0.571,0.723,0.638,0.597,0.758,0.668,0.559,0.723,0.630,0.597,0.767,0.671
3,0.594,0.656,0.624,0.544,0.644,0.590,0.638,0.631,0.634,0.561,0.660,0.606
4,0.662,0.679,0.670,0.571,0.666,0.615,0.732,0.618,0.670,0.599,0.541,0.569
5,0.660,0.707,0.683,0.585,0.662,0.621,0.683,0.652,0.667,0.659,0.693,0.675
...,...,...,...,...,...,...,...,...,...,...,...,...
158,0.655,0.678,0.666,0.602,0.655,0.627,0.626,0.678,0.651,0.632,0.673,0.652
159,0.665,0.576,0.617,0.604,0.566,0.584,0.656,0.574,0.612,0.622,0.564,0.591
160,0.555,0.740,0.634,0.524,0.686,0.594,0.673,0.814,0.737,0.584,0.712,0.642
161,0.532,0.736,0.617,0.474,0.690,0.562,0.617,0.740,0.673,0.499,0.716,0.588


In [18]:
#export bert results to csv
bert_results_df.to_csv('bert_results_df.csv')

## Computer METEOR score

METEOR for Model 1

In [21]:
type(reference_answers[1])

str

In [29]:
reference = reference_answers[1].split()
hyptothesis = model1_answers[1].split()

score = meteor_score([reference], hyptothesis)
print(score)

LookupError: 
**********************************************************************
  Resource [93mwordnet[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('wordnet')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mcorpora/wordnet[0m

  Searched in:
    - 'C:\\Users\\KD/nltk_data'
    - 'c:\\Users\\KD\\miniconda3\\envs\\hsc-llm\\nltk_data'
    - 'c:\\Users\\KD\\miniconda3\\envs\\hsc-llm\\share\\nltk_data'
    - 'c:\\Users\\KD\\miniconda3\\envs\\hsc-llm\\lib\\nltk_data'
    - 'C:\\Users\\KD\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************


In [27]:
m1_m_score = []
for x in range(len(model1_answers)):
    reference = reference_answers[x].split()
    candidate = model1_answers[x].split()
    m1_m_score.append(single_meteor_score([reference], candidate)

SyntaxError: unexpected EOF while parsing (1587151649.py, line 5)

In [20]:
m1_precision = []
m1_recall = []
m1_f1 = []
for x in range(len(model1_answers)):
    P, R, F1 = meteor_score(reference_answers[x], [model1_answers[x]])
    m1_precision.append(P.item())
    m1_recall.append(R.item())   
    m1_f1.append(F1.item())

m1_precision = [round(p, 3) for p in m1_precision]
m1_recall = [round(r, 3) for r in m1_recall]
m1_f1 = [round(f, 3) for f in m1_f1]

TypeError: "reference" expects pre-tokenized reference (Iterable[str]): A

METEOR for Model 2

In [None]:
m2_precision = []
m2_recall = []
m2_f1 = []
for x in range(len(model2_answers)):
    P, R, F1 = meteor_score([reference_answers[x]], [model2_answers[x]])
    m2_precision.append(P.item())
    m2_recall.append(R.item())   
    m2_f1.append(F1.item())

m2_precision = [round(p, 3) for p in m2_precision]
m2_recall = [round(r, 3) for r in m2_recall]
m2_f1 = [round(f, 3) for f in m2_f1]

METEOR for Model 1a

In [None]:
m1a_precision = []
m1a_recall = []
m1a_f1 = []
for x in range(len(model1a_answers)):
    P, R, F1 = meteor_score([reference_answers[x]], [model1a_answers[x]])
    m1a_precision.append(P.item())
    m1a_recall.append(R.item())   
    m1a_f1.append(F1.item())

m1a_precision = [round(p, 3) for p in m1a_precision]
m1a_recall = [round(r, 3) for r in m1a_recall]
m1a_f1 = [round(f, 3) for f in m1a_f1]


METEOR for Model 2a

In [None]:
m2a_precision = []
m2a_recall = []
m2a_f1 = []
for x in range(len(model2a_answers)):
    P, R, F1 = meteor_score([reference_answers[x]], [model2a_answers[x]])
    m2a_precision.append(P.item())
    m2a_recall.append(R.item())   
    m2a_f1.append(F1.item())

m2a_precision = [round(p, 3) for p in m2a_precision]
m2a_recall = [round(r, 3) for r in m2a_recall]
m2a_f1 = [round(f, 3) for f in m2a_f1]


### Put into CSV

In [None]:
#create index
index_list = []
for i in range(1, len(questions)+1):
    index_list.append(i)

In [None]:
meteor_results_df = pd.DataFrame({
    'ID': index_list,
    'MS M1 Precision': m1_precision,
    'MS M1 Recall': m1_recall,
    'MS M1 F1': m1_f1,
    'MS M2 Precision': m2_precision,
    'MS M2 Recall': m2_recall,
    'MS M2 F1': m2_f1,
    'MS M1a Precision': m1a_precision,
    'MS M1a Recall': m1a_recall,
    'MS M1a F1': m1a_f1,
    'MS M2a Precision': m2a_precision,
    'MS M2a Recall': m2a_recall,
    'MS M2a F1': m2a_f1
})

meteor_results_df.set_index('ID', inplace=True)

In [None]:
meteor_results_df

In [None]:
meteor_results_df.to_csv('meteor_results_df.csv')