# ROUGE Metrics
### This notebook implements ROUGE metrics on a specific dataset 

##  Importing necessary libraries

Ensure that the Python environment you are running this in has all the libraries present in [requirements.txt](requirements.txt).


In [None]:
import re
import pandas as pd
import json
from rouge import Rouge
import re
from nltk.translate.bleu_score import corpus_bleu
rouge = Rouge()

## Importing the necessary files

In [None]:
llm_output_path = 'path_to_llm_output'
json_file_path = 'path_to_json_file'
llm_output_path_gpt4 = 'path_to_llm_output_gpt4'
vicuna_output_path = 'path_to_vicuna_output'

In [None]:
llm_output = pd.read_excel(llm_output_path)
gpt4_turbo = pd.read_excel(llm_output_path_gpt4)
vicuna_output = pd.read_excel(vicuna_output_path)

In [None]:

with open(json_file_path, 'r') as json_file:
    data = json.load(json_file)

### Data Manipulation

In [None]:
vicuna_output.rename(columns = {'outputs':'f_output'}, inplace = True)

In [None]:
gpt4_turbo = gpt4_turbo[:19]
gpt4_turbo = gpt4_turbo.drop([10,16], axis = 'index')
gpt4_turbo.index = list(range(17))

In [None]:
data_vic = data[:9]

## Implementation
### Lists all the necessary functions required for calculating ROUGE score

In [None]:
# Function to calculate the rouge score

def get_rouge_score(hyps, refs):
    hyp = str(json.loads(hyps))
    ref = str(json.loads(refs))
    rouge_scores = rouge.get_scores(hyp, ref, avg=True)
    return rouge_scores



In [None]:
# Function to modify the string to our required format

def modify(mystring):
    mystring = re.sub("\'",'\"',mystring)
    mystring = re.sub("True","\"True\"",mystring)
    mystring = re.sub("False","\"False\"",mystring)
    return mystring



In [None]:
# Function to calculate the rouge score and return it as a dataframe

def evaluate(data, llm_output):
    df = pd.DataFrame(columns=['rouge-1 r','rouge-1 p','rouge-1 f','rouge-2 r','rouge-2 p','rouge-2 f','rouge-l r','rouge-l p','rouge-l f'])
    for i in range(len(data)):
        #print(llm_output['query'][i])
        hyps = str(data[i]['solutions'])
        refs = str(llm_output['f_output'][i])

        hyps = modify(hyps)
        refs = modify(refs)
        result = get_rouge_score(hyps,refs)
        df.loc[len(df)]=[result['rouge-1']['r'],result['rouge-1']['r'],result['rouge-1']['r'],
                          result['rouge-2']['r'],result['rouge-2']['r'],result['rouge-2']['r'],
                          result['rouge-l']['r'],result['rouge-l']['r'],result['rouge-l']['r']]

    return df



## Execution

In [None]:
answer = evaluate(data, llm_output)

In [None]:
answer_gpt4 = evaluate(data, gpt4_turbo)

### Converting into an excel file

In [None]:
answer.to_excel('gpt3.5_rouge_eval.xlsx')

In [None]:
answer_gpt4.to_excel('gpt4-turbo-eval-rouge.xlsx')