# MLE - G9: Confiabilidad de Modelos de Lenguaje

## 1. Imports necesarios

In [None]:
from dotenv import load_dotenv
import nltk
from functions.metamorphic import calculate_AFR, calculate_AFR_without_Bing
from functions.perturbations import delete_characters, add_characters, add_random_words, remplace_named_entities, replace_characters, replace_words_with_antonyms, replace_words_with_synonyms, delete_sentences
from functions.models import request_to_bing
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

nltk.download('punkt')
load_dotenv()

## 2. Comprobación de funcionamiento de Bing Chat

In [None]:
import os
print(os.getenv("BING_U_COOKIE"))

In [None]:
print(await request_to_bing("What is the capital of Spain?"))

## 3. Modelos a evaluar 

In [None]:
models = [
    ('summarize', 'facebook/bart-large-cnn'),
    ('summarize', 'google/pegasus-large'),
    ('toxic', 's-nlp/roberta_toxicity_classifier'),
    ('toxic', 'citizenlab/distilbert-base-multilingual-cased-toxicity', 'inputs'),
    ('spam', 'h-e-l-l-o/email-spam-classification-merged'),
    ('spam', 'dima806/email-spam-detection-roberta'),
    ('translate', 't5-base'),
    ('translate', 'allenai/wmt16-en-de-12-1'),
    ('fillmask', 'vinai/bertweet-base', '<mask>'),
    ('fillmask', 'roberta-base', '<mask>'),
]

summarize_models = [models[0], models[1]]
toxic_models = [models[2], models[3]]
spam_models = [models[4], models[5]]
translate_models = [models[6], models[7]]
fillmask_models = [models[8], models[9]]

## 4. Evaluación de modelos sin hacer uso de Bing Chat

In [None]:
perturbations_without_bing = [(delete_characters, False, 'delete_characters'), (replace_characters, False, 'replace_characters'), (add_characters, False, 'add_characters')]
attributes_without_bing = ["Robustness"]

In [None]:
async def AFR_without_Bing(models, perturbations, attributes, iterations = 1):
    RESULTS = pd.DataFrame(columns=["Value", "Time"])
    for model in models:
        for perturbation in perturbations:
            for attribute in attributes:
                print(model[0], model[1], perturbation[2], attribute)
                AFR, M = await calculate_AFR_without_Bing(model, perturbation[0], attribute, perturbation[1], iterations=iterations)
                print(AFR, M)
                RESULTS.loc[model[0] + " - " + model[1] + " - " + perturbation[2] + " - " + attribute] = [AFR, M]

    return RESULTS

In [None]:
# Load Hugging Face models

await AFR_without_Bing(summarize_models, perturbations_without_bing, attributes_without_bing, iterations=1)

# Execute tests

summarize_results = await AFR_without_Bing(summarize_models, perturbations_without_bing, attributes_without_bing, iterations=10)
summarize_results

In [None]:
# Load Hugging Face models

await AFR_without_Bing(toxic_models, perturbations_without_bing, attributes_without_bing, iterations=1)

# Execute tests

toxic_results = await AFR_without_Bing(toxic_models, perturbations_without_bing, attributes_without_bing, iterations=10)
toxic_results

In [None]:
# Load Hugging Face models

await AFR_without_Bing(spam_models, perturbations_without_bing, attributes_without_bing, iterations=1)

# Execute tests

spam_results = await AFR_without_Bing(spam_models, perturbations_without_bing, attributes_without_bing, iterations=10)
spam_results

In [None]:
# Load Hugging Face models

await AFR_without_Bing(translate_models, perturbations_without_bing, attributes_without_bing, iterations=1)

# Execute tests

translate_results = await AFR_without_Bing(translate_models, perturbations_without_bing, attributes_without_bing, iterations=10)
translate_results

In [None]:
# Load Hugging Face models

await AFR_without_Bing(fillmask_models, perturbations_without_bing, attributes_without_bing, iterations=1)

# Execute tests

fill_masks_results = await AFR_without_Bing(fillmask_models, perturbations_without_bing, attributes_without_bing, iterations=10)
fill_masks_results

Exportación de resultados:

In [None]:
summarize_results.to_csv('./results/results.csv', mode='a', header=False)
toxic_results.to_csv('./results/results.csv', mode='a', header=False)
spam_results.to_csv('./results/results.csv', mode='a', header=False)
translate_results.to_csv('./results/results.csv', mode='a', header=False)
fill_masks_results.to_csv('./results/results.csv', mode='a', header=False)

## 5. Visualizaciones

Lectura de los datos

In [None]:
df = pd.read_csv('./results/results.csv', header=None, names=['Columna1', 'Score', 'Time'])

df[['Task', 'Model', 'Perturbation', 'Attribute']] = df['Columna1'].str.split(' - ', expand=True)

df['Score'] = pd.to_numeric(df['Score'])
df['Time'] = pd.to_numeric(df['Time'])

df = df.drop(columns=['Columna1'])

df.head()

Score promedio por tarea

In [None]:
plt.figure(figsize=(12, 6))
df.groupby('Task')['Score'].mean().plot(kind='bar', color='skyblue')
plt.title('Score promedio por tarea')
plt.xlabel('Tarea')
plt.ylabel('Score promedio')
plt.show()

Tiempo vs. Score

In [None]:
plt.figure(figsize=(12, 6))
plt.scatter(df['Time'], df['Score'], cmap='viridis', alpha=0.7)
plt.title('Tiempo vs. Score')
plt.xlabel('Tiempo (segundos)')
plt.ylabel('Score')
plt.show()

Boxplot para Comparar la Distribución de los Scores por Tarea

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x='Task', y='Score', data=df, palette='viridis')
plt.title('Distribución de Scores por Tarea')
plt.xlabel('Tarea')
plt.ylabel('Score')
plt.xticks(rotation=45)
plt.show()


Distribución de Tiempos por Tarea

In [None]:
plt.figure(figsize=(12, 6))
df.groupby('Task')['Time'].hist(alpha=0.7, bins=20, stacked=True, legend=True, figsize=(12, 6))
plt.title('Distribución de Tiempos por Tarea')
plt.xlabel('Tiempo (segundos)')
plt.ylabel('Frecuencia')
plt.legend(title='Tarea', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()



Gráfico de Violín para Distribución de Tiempo por Modelo

In [None]:
plt.figure(figsize=(12, 8))
sns.violinplot(x='Model', y='Time', data=df, palette='viridis')
plt.title('Distribución de Tiempo por Modelo')
plt.xlabel('Modelo')
plt.ylabel('Tiempo (segundos)')
plt.xticks(rotation=45)
plt.show()

Distribución de Tiempo por Perturbación

In [None]:
plt.figure(figsize=(12, 8))
sns.violinplot(x='Perturbation', y='Time', data=df, palette='viridis')
plt.title('Distribución de Tiempo por Perturbación')
plt.xlabel('Perturbación')
plt.ylabel('Tiempo (segundos)')
plt.xticks(rotation=45)
plt.show()

## 6. Evaluación de modelos haciendo uso de Bing Chat

In [None]:
perturbations = [(delete_characters, False, 'delete_characters'), (replace_characters, False, 'replace_characters'), (add_characters, False, 'add_characters'),(replace_words_with_synonyms, False, 'replace_word_synonyms'), (replace_words_with_antonyms, True,'replace_word_antonyms'), (add_random_words, False,'add_random_words'), (remplace_named_entities, False,'remplace_named_entities')]
attributes = ["Robustness", "Non-determinism", "Fairness"]

In [None]:
async def AFR(models, perturbations, attributes):
    RESULTS = pd.DataFrame(columns=["Value", "Time"])
    for model in models:
        m_type = model[0]
        for perturbation in perturbations:
            new_attributes = attributes.copy() if m_type != "summarize" else attributes.copy()[:-1]
            for attribute in new_attributes:
                print(model[0], model[1], perturbation[2], attribute)
                AFR, t = await calculate_AFR(model, perturbation[0], attribute, perturbation[2], perturbation[1], iterations=1)
                print(AFR, t)
                RESULTS.loc[model[0] + " - " + model[1] + " - " + perturbation[2] + " - " + attribute] = [AFR, t]

    return RESULTS

In [None]:
summarize_results = await AFR(summarize_models, perturbations, attributes)
summarize_results

In [None]:
toxic_results = await AFR(toxic_models, perturbations, attributes)
toxic_results

In [None]:
spam_results = await AFR(spam_models, perturbations, attributes)
spam_results

In [None]:
translate_results = await AFR(translate_models, perturbations, attributes)
translate_results

In [None]:
fill_masks_results = await AFR(fillmask_models, perturbations, attributes)
fill_masks_results