In [2]:
from rouge import Rouge 
import pandas as pd

def calculate_rouge_scores(results_df):
    rouge = Rouge()
    scores = []

    for index, row in results_df.iterrows():
        for question in ['Text_SubjectiveLit', 'Text_Anxiety', 'Text_Numeracy', 'Text_TrustPhys']:
            generated_response = row[f"{question} Generated"]
            true_label = row[f"{question} True"]

            if isinstance(generated_response, str) and isinstance(true_label, str):
                # Calculate ROUGE scores
                score = rouge.get_scores(generated_response, true_label)

                scores.append({
                    'Sample Index': row['Sample Index'],
                    'Condition': row['Condition'],
                    'Question': question,
                    'ROUGE-1': score[0]['rouge-1']['f'],
                    'ROUGE-2': score[0]['rouge-2']['f'],
                    'ROUGE-L': score[0]['rouge-l']['f'],
                    'Generated Response': generated_response,
                    'True Label': true_label
                })

    return pd.DataFrame(scores)


In [3]:
experimentName = "4o"

results_df = pd.read_csv(f"results/experiment_results_{experimentName}_all.csv")
rouge_scores_df = calculate_rouge_scores(results_df)


In [4]:
rouge_scores_df.head()

Unnamed: 0,Sample Index,Condition,Question,ROUGE-1,ROUGE-2,ROUGE-L,Generated Response,True Label
0,0,all_4.jsonl,Text_SubjectiveLit,0.088235,0.0,0.058824,I feel fairly confident in my ability to obtai...,Life is great and we all must pick what we wan...
1,0,all_4.jsonl,Text_Anxiety,0.115942,0.047059,0.115942,"When I visit the doctor's office, I sometimes ...",How doctors decide to take care of uses and pr...
2,0,all_4.jsonl,Text_Numeracy,0.16,0.015873,0.16,"Sure, there was a time when a close friend was...",The girl I'm engaged to has health problems at...
3,0,all_4.jsonl,Text_TrustPhys,0.101266,0.0,0.075949,I generally trust my primary care physician be...,Doctors and nurses ppl must have full trust in...
4,0,conditioning_on_all.jsonl,Text_SubjectiveLit,0.131579,0.0,0.131579,I feel quite confident in my ability to obtain...,Life is great and we all must pick what we wan...


In [6]:
rouge_scores_df.drop(columns=['Generated Response', 'True Label']).to_csv(f"results/rouge_scores_{experimentName}.csv", index=False)

In [7]:
experimentName = "llama"

results_df = pd.read_csv(f"results/experiment_results_{experimentName}_all.csv")
rouge_scores_df = calculate_rouge_scores(results_df)
rouge_scores_df.drop(columns=['Generated Response', 'True Label']).to_csv(f"results/rouge_scores_{experimentName}.csv", index=False)