**imports**

In [1]:
import os
from tqdm import tqdm
import torch
import pandas as pd
from transformers import logging
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

2025-05-05 15:48:47.489447: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746460127.680401      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746460127.734018      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# Functions

In [2]:
#sentiment analysis - determine if sentence is positive, negative, or neutral
def analyze_sentiment(text):
    result = sentiment_analyzer(text)[0] #get sentiment label/score for text
    return result["label"], result["score"]

In [3]:
#do cosine similarity between two sentence embeddings (in this case, something and generated text)
def calculate_cosine_similarity(thought, generated):
    #encode the sentences with SBERT
    embeddings = sbert_model.encode([thought, generated])

    #calc cosine similarity
    similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]

    return similarity

In [4]:
#do functions on file
def process_dataframe(df):
    results = [] #init

    #prepare lists of texts for batch processing
    original_texts = df["Original_Text"].tolist()
    true_texts = df["True_Text"].tolist()
    generated_texts = df["Generated_Text"].tolist()

    #batch sentiment analysis
    original_sentiments = sentiment_analyzer(original_texts)
    true_sentiments = sentiment_analyzer(true_texts)
    generated_sentiments = sentiment_analyzer(generated_texts)

    #encode all texts at once for cosine similarity
    all_texts = original_texts + true_texts + generated_texts
    embeddings = sbert_model.encode(all_texts)
    original_embeddings = embeddings[:len(original_texts)]
    true_embeddings = embeddings[len(original_texts):len(original_texts) + len(true_texts)]
    generated_embeddings = embeddings[len(original_texts) + len(true_texts):]

    for i in tqdm(range(len(df)), desc="Processing rows", miniters=10): #cool progress bar but only sometimes
        #get data
        original = original_texts[i]
        true = true_texts[i]
        generated = generated_texts[i]

        #extract sentiment analysis results
        original_sentiment = original_sentiments[i]["label"]
        original_confidence = original_sentiments[i]["score"]
        true_sentiment = true_sentiments[i]["label"]
        true_confidence = true_sentiments[i]["score"]
        generated_sentiment = generated_sentiments[i]["label"]
        generated_confidence = generated_sentiments[i]["score"]

        #calculate cosine similarity
        cos_sim_original = cosine_similarity([original_embeddings[i]], [generated_embeddings[i]])[0][0]
        cos_sim_true = cosine_similarity([true_embeddings[i]], [generated_embeddings[i]])[0][0]

        #put all results into df
        results.append({
            "original": original,
            "original_sentiment": original_sentiment,
            "original_sentiment_confidence": original_confidence,
            "true": true,
            "true_sentiment": true_sentiment,
            "true_sentiment_confidence": true_confidence,
            "generated": generated,
            "generated_sentiment": generated_sentiment,
            "generated_sentiment_confidence": generated_confidence,
            "cosine_similarity_original": cos_sim_original,
            "cosine_similarity_true": cos_sim_true
        })

    return pd.DataFrame(results)

In [5]:
#get finalized results from all model output results dataframes
def finalize_results(all_results):
    finalized_results = None #init final results
    sentiments = ["POSITIVE", "NEGATIVE"] #init sentiments

    for result in all_results: #get vals from each result dataframe

        #count sentiment values (while ensuring each sentiment gets a category)
        original_counts = result["original_sentiment"].value_counts().reindex(sentiments, fill_value=0)
        true_counts = result["true_sentiment"].value_counts().reindex(sentiments, fill_value=0)
        generated_counts = result["generated_sentiment"].value_counts().reindex(sentiments, fill_value=0)

        #make sentiment counts into named columns for df
        sentiment_counts = {
            f"original_sentiment_{s.lower()}": original_counts[s] for s in sentiments
        } | {
            f"true_sentiment_{s.lower()}": true_counts[s] for s in sentiments
        } | {
            f"generated_sentiment_{s.lower()}": generated_counts[s] for s in sentiments
        }

        #average confidence scores independently
        avg_confidences = result[[
            "original_sentiment_confidence",
            "true_sentiment_confidence",
            "generated_sentiment_confidence"
        ]].mean()

        #average cosine similarities independently
        avg_cosine_similarities = result[[
            "cosine_similarity_original",
            "cosine_similarity_true"
        ]].mean()

        #make summary of results a single-row df to add to finalized results
        summary = pd.DataFrame([{
            **sentiment_counts,
            **avg_confidences.to_dict(),
            **avg_cosine_similarities.to_dict()
        }])

        if finalized_results is None:
            finalized_results = summary
        else:
            finalized_results = pd.concat([finalized_results, summary], ignore_index=True)

    return finalized_results

# Implementation

In [6]:
#load GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#get/make dirs
outputs_dir = "./model_outputs/"
results_dir = "./results/"
os.makedirs(results_dir, exist_ok=True)

In [7]:
#so less shows up in cell output
logging.set_verbosity_error()

#load sentiment analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis", device=0)

#load SBERT model - cosine similarity
sbert_model = SentenceTransformer("all-MiniLM-L6-v2", device="cuda")

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [8]:
#open test data
test_data = pd.read_csv("./data/test_data.csv")

#file names (why not hard code in lol)
names = ["generated_output_gru.csv", "generated_output_gru_non_deterministic.csv", "generated_output_seq2seq.csv",
         "generated_output_seq2seq_non_deterministic.csv", "generated_output_t5.csv", "generated_output_t5_non_deterministic.csv",
         "generated_output_t5_all.csv", "generated_output_t5_all_non_deterministic.csv"]

#open each file as df and store in list
dataframes = [pd.read_csv(outputs_dir + name) for name in names]

In [9]:
#store results for all df
all_results = []

#iterate to add back in original text and get results for each df
for i, dataframe in enumerate(dataframes):
    if "t5" in names[i]:
        dataframe.insert(1, "Original_Text", test_data["negative_thought"]) #add original text to all outputs missing it (just t5 models, which is fine)
    #make each col a string just in case it's causing issues
    dataframe["Original_Text"] = dataframe["Original_Text"].astype(str)
    dataframe["True_Text"] = dataframe["True_Text"].astype(str)
    dataframe["Generated_Text"] = dataframe["Generated_Text"].astype(str)
    results_df = process_dataframe(dataframe.copy()) #use .copy() to avoid potential SettingWithCopyWarning
    all_results.append(results_df)

#get finalized results
finalized_results = finalize_results(all_results)
finalized_results.insert(0, "model", [name[17:-4]for name in names]) #removes "generated_output" and ".csv" from names when adding

Batches:   0%|          | 0/246 [00:00<?, ?it/s]

Processing rows: 100%|██████████| 2623/2623 [00:01<00:00, 2549.78it/s]


Batches:   0%|          | 0/246 [00:00<?, ?it/s]

Processing rows: 100%|██████████| 2623/2623 [00:00<00:00, 2855.57it/s]


Batches:   0%|          | 0/246 [00:00<?, ?it/s]

Processing rows: 100%|██████████| 2623/2623 [00:00<00:00, 2829.68it/s]


Batches:   0%|          | 0/246 [00:00<?, ?it/s]

Processing rows: 100%|██████████| 2623/2623 [00:00<00:00, 2743.95it/s]


Batches:   0%|          | 0/246 [00:00<?, ?it/s]

Processing rows: 100%|██████████| 2623/2623 [00:00<00:00, 2917.51it/s]


Batches:   0%|          | 0/246 [00:00<?, ?it/s]

Processing rows: 100%|██████████| 2623/2623 [00:00<00:00, 2894.81it/s]


Batches:   0%|          | 0/246 [00:00<?, ?it/s]

Processing rows: 100%|██████████| 2623/2623 [00:00<00:00, 2645.20it/s]


Batches:   0%|          | 0/246 [00:00<?, ?it/s]

Processing rows: 100%|██████████| 2623/2623 [00:00<00:00, 2775.54it/s]


In [10]:
#save every results file to csv
for i, result in enumerate(all_results):
    filename = names[i].replace(".csv", "_evaluated.csv")
    result.to_csv(results_dir + filename, index=False)

#save finalized results to csv
finalized_results.to_csv(results_dir + "finalized_model_output_results.csv", index=False)

print("All results saved!")

All results saved!
