In [1]:
from dotenv import load_dotenv
import nltk
from functions.metamorphic import calculate_M_ASR, calculate_M_ASR_without_Bing
from functions.perturbations import delete_characters, add_characters, add_random_words, remplace_named_entities, replace_characters, replace_words_with_antonyms, replace_words_with_synonyms, delete_sentences
from functions.models import request_to_bing
import pandas as pd

nltk.download('punkt')
load_dotenv()

[nltk_data] Downloading package punkt to /Users/miguel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
import os
print(os.getenv("BING_U_COOKIE"))

11XHk2dHBh87rF9cZKjg-zQjk_NqDKbUqol32Wb4df35Q2aqy1sw514p2RN7sq9fHrgga_Pt39FVmWW0rHFlyQbvTdQkeOusk7zHTTNnGnwqbCFznhJJBDWv6APf1a7liMLkfXu6A7cPdXpPNsEWEPR4wzuOGZ_U1ztJGYiHrotj4iaOmbeS_bNVFkSJdK3KH1UmgWPRWpAw3IApbEvE5guEkqlSySV3O_4qS1wWYizs


In [3]:
print(await request_to_bing("What is the capital of Spain?"))

CreateConversationException: Failed to create conversation, received status: 404

## Evaluación de modelos con BING

In [3]:
models = [
    ('summarize', 'facebook/bart-large-cnn'),
    ('summarize', 'google/pegasus-large'),
    ('summarize', 'marianna13/flan-t5-base-summarization'),
    ('toxic', 's-nlp/roberta_toxicity_classifier'),
    ('toxic', 'citizenlab/distilbert-base-multilingual-cased-toxicity', 'inputs'),
    ('toxic', 'martin-ha/toxic-comment-model'),
    ('spam', 'rafacost/bert_base_pt_en_cased_email_spam'),
    ('spam', 'h-e-l-l-o/email-spam-classification-merged'),
    ('spam', 'dima806/email-spam-detection-roberta'),
    ('translate', 't5-base'),
    ('translate', 'allenai/wmt16-en-de-12-1'),
    ('translate', 'facebook/wmt19-en-de'),
    ('fillmask', 'bert-base-uncased', '[MASK]'),
    ('fillmask', 'vinai/bertweet-base', '<mask>'),
    ('fillmask', 'roberta-base', '<mask>'),
]

models_summarize = [models[0], models[1]]
models_toxic = [models[3], models[4]]
models_spam = [models[6], models[7]]
models_translate = [models[9], models[10]]
models_fillmask = [models[13], models[14]]
perturbations = [(delete_characters, False, 'delete_characters'), (replace_characters, False, 'replace_characters'), (add_characters, False, 'add_characters'),(replace_words_with_synonyms, False, 'replace_word_synonyms'), (replace_words_with_antonyms, True,'replace_word_antonyms'), (add_random_words, False,'add_random_words'), (remplace_named_entities, False,'remplace_named_entities')]
atribbutes = ["Robustness", "Non-determinism", "Fairness"]

In [8]:
async def M_ASR(models, perturbations, attributes):
    RESULTS = pd.DataFrame(columns=["Value", "Time"])
    for model in models:
        m_type = model[0]
        for perturbation in perturbations:
            new_attibutes = attributes.copy() if m_type != "summarize" else atribbutes.copy()[:-1]
            for attribute in new_attibutes:
                print(model[0], model[1], perturbation[2], attribute)
                M, ASR = await calculate_M_ASR(model, perturbation[0], attribute, perturbation[2], perturbation[1], iterations=1)
                print(M, ASR)
                RESULTS.loc[model[0] + " - " + model[1] + " - " + perturbation[2] + " - " + attribute] = [M, ASR]

    return RESULTS

In [None]:
summarize_results = await M_ASR(models_summarize, perturbations, atribbutes)

In [7]:
summarize_results

Unnamed: 0,Value,Time
facebook/bart-large-cnn - delete_characters - Robustness,1.0,8.264758
facebook/bart-large-cnn - delete_characters - Non-determinism,1.0,17.77017
facebook/bart-large-cnn - replace_characters - Robustness,0.0,7.813684
facebook/bart-large-cnn - replace_characters - Non-determinism,1.0,21.727678
facebook/bart-large-cnn - add_characters - Robustness,1.0,10.058627
facebook/bart-large-cnn - add_characters - Non-determinism,1.0,31.354026
facebook/bart-large-cnn - replace_words_with_synonyms - Robustness,1.0,14.693539
facebook/bart-large-cnn - replace_words_with_synonyms - Non-determinism,0.0,30.646007
facebook/bart-large-cnn - replace_words_with_antonyms - Robustness,0.0,14.946164
facebook/bart-large-cnn - replace_words_with_antonyms - Non-determinism,0.0,27.091527


In [None]:
toxic_results = await M_ASR(models_toxic, perturbations, atribbutes)

In [None]:
toxic_results

In [None]:
spam_results = await M_ASR(models_spam, perturbations, atribbutes)

In [None]:
spam_results

In [None]:
translate_results = await M_ASR(models_translate, perturbations, atribbutes)

In [None]:
translate_results

In [None]:
fill_masks_results = await M_ASR(models_fillmask, perturbations, atribbutes)

In [None]:
fill_masks_results

## Evaluación de modelos sin BING

In [3]:
models = [
    ('summarize', 'facebook/bart-large-cnn'),
    ('summarize', 'google/pegasus-large'),
    ('summarize', 'marianna13/flan-t5-base-summarization'),
    ('toxic', 's-nlp/roberta_toxicity_classifier'),
    ('toxic', 'citizenlab/distilbert-base-multilingual-cased-toxicity', 'inputs'),
    ('toxic', 'martin-ha/toxic-comment-model'),
    ('spam', 'rafacost/bert_base_pt_en_cased_email_spam'), # Deleted by their owner
    ('spam', 'h-e-l-l-o/email-spam-classification-merged'),
    ('spam', 'dima806/email-spam-detection-roberta'),
    ('translate', 't5-base'),
    ('translate', 'allenai/wmt16-en-de-12-1'),
    ('translate', 'facebook/wmt19-en-de'),
    ('fillmask', 'bert-base-uncased', '[MASK]'),
    ('fillmask', 'vinai/bertweet-base', '<mask>'),
    ('fillmask', 'roberta-base', '<mask>'),
]

models_summarize = [models[0], models[1]]
models_toxic = [models[3], models[4]]
models_spam = [models[7], models[8]]
models_translate = [models[9], models[10]]
models_fillmask = [models[13], models[14]]
perturbations_without_bing = [(delete_characters, False, 'delete_characters'), (replace_characters, False, 'replace_characters'), (add_characters, False, 'add_characters')]
attributes_without_bing = ["Robustness"]

In [4]:
async def M_ASR_without_Bing(models, perturbations, attributes):
    RESULTS = pd.DataFrame(columns=["Value", "Time"])
    for model in models:
        for perturbation in perturbations:
            for attribute in attributes:
                print(model[0], model[1], perturbation[2], attribute)
                M, ASR = await calculate_M_ASR_without_Bing(model, perturbation[0], attribute, perturbation[1], iterations = 1)
                print(M, ASR)
                RESULTS.loc[model[0] + " - " + model[1] + " - " + perturbation[2] + " - " + attribute] = [M, ASR]

    return RESULTS

In [5]:
summarize_results = await M_ASR_without_Bing(models_summarize, perturbations_without_bing, attributes_without_bing)
summarize_results

summarize facebook/bart-large-cnn delete_characters Robustness
A naive hustler travels from Texas to New York City to seek personal fortune. He finds a new friend in the process, and a new way to make money. The pair meet up for the first time in New York's SoHo neighborhood. The two become friends, and the pair travel together for the rest of the trip.
CNN.com will feature iReporter photos in a weekly Travel Snapshots gallery. Visit CNN.com/Travel each week for a new gallery of snapshots from around the world. Visit www.dailymail.co.uk/travel for a latest gallery of photos.
["A naive hustler travels from Texas to New York City to seek personal fortune. He finds a new friend in the process, and a new way to make money. The pair meet up for the first time in New York's SoHo neighborhood. The two become friends, and the pair travel together for the rest of the trip.", 'CNN.com will feature iReporter photos in a weekly Travel Snapshots gallery. Visit CNN.com/Travel each week for a new gal

In [8]:
toxic_results = await M_ASR_without_Bing(models_toxic, perturbations_without_bing, attributes_without_bing)
toxic_results

toxic s-nlp/roberta_toxicity_classifier delete_characters Robustness
1.0 23.209882736206055
toxic s-nlp/roberta_toxicity_classifier replace_characters Robustness
1.0 1.7736492156982422
toxic s-nlp/roberta_toxicity_classifier add_characters Robustness
1.0 2.0148849487304688
toxic s-nlp/roberta_toxicity_classifier delete_sentences Robustness
1.0 1.8481709957122803
toxic citizenlab/distilbert-base-multilingual-cased-toxicity delete_characters Robustness
1.0 23.87169098854065
toxic citizenlab/distilbert-base-multilingual-cased-toxicity replace_characters Robustness
1.0 1.7100698947906494
toxic citizenlab/distilbert-base-multilingual-cased-toxicity add_characters Robustness
0.0 2.025097370147705
toxic citizenlab/distilbert-base-multilingual-cased-toxicity delete_sentences Robustness
1.0 1.7129030227661133


Unnamed: 0,Value,Time
0,1.0,23.209883
1,1.0,1.773649
2,1.0,2.014885
3,1.0,1.848171
4,1.0,23.871691
5,1.0,1.71007
6,0.0,2.025097
7,1.0,1.712903


In [12]:
spam_results = await M_ASR_without_Bing(models_spam, perturbations_without_bing, attributes_without_bing)
spam_results

spam h-e-l-l-o/email-spam-classification-merged delete_characters Robustness
1.0 1.2653450965881348
spam h-e-l-l-o/email-spam-classification-merged replace_characters Robustness
1.0 1.2148048877716064
spam h-e-l-l-o/email-spam-classification-merged add_characters Robustness
1.0 1.506814956665039
spam h-e-l-l-o/email-spam-classification-merged delete_sentences Robustness
1.0 1.4900460243225098
spam dima806/email-spam-detection-roberta delete_characters Robustness
1.0 22.97094202041626
spam dima806/email-spam-detection-roberta replace_characters Robustness
1.0 1.158202886581421
spam dima806/email-spam-detection-roberta add_characters Robustness
1.0 1.3632292747497559
spam dima806/email-spam-detection-roberta delete_sentences Robustness
1.0 1.1460521221160889


Unnamed: 0,Value,Time
spam - h-e-l-l-o/email-spam-classification-merged - delete_characters - Robustness,1.0,1.265345
spam - h-e-l-l-o/email-spam-classification-merged - replace_characters - Robustness,1.0,1.214805
spam - h-e-l-l-o/email-spam-classification-merged - add_characters - Robustness,1.0,1.506815
spam - h-e-l-l-o/email-spam-classification-merged - delete_sentences - Robustness,1.0,1.490046
spam - dima806/email-spam-detection-roberta - delete_characters - Robustness,1.0,22.970942
spam - dima806/email-spam-detection-roberta - replace_characters - Robustness,1.0,1.158203
spam - dima806/email-spam-detection-roberta - add_characters - Robustness,1.0,1.363229
spam - dima806/email-spam-detection-roberta - delete_sentences - Robustness,1.0,1.146052


In [6]:
translate_results = await M_ASR_without_Bing(models_translate, perturbations_without_bing, attributes_without_bing)
translate_results

translate t5-base delete_characters Robustness
Die Besatzung des Schiffes Serenity versucht, einem telepathischen Attentäter zu entkommen.
Die Besatzung der shi Serenit versucht, einem Mörder zu entgehen, der elepathisch ist.
['Die Besatzung des Schiffes Serenity versucht, einem telepathischen Attentäter zu entkommen.', 'Die Besatzung der shi Serenit versucht, einem Mörder zu entgehen, der elepathisch ist.']
1.0 5.5460169315338135
translate t5-base replace_characters Robustness
Ein Polizeidetektiv, ein Bankräuber und ein mächtiger Makler treten nach dem glänzenden Raub des Kriminellen in eine Geiselsituation.
Ein uolice detectivL, ein Bankrobher, Xnd ein hoch-pmwer brokea in hoch-stakas negotiaTions afUer die brilliknt heist spVrals des Kriminellen in eine hostEge siTuation.
['Ein Polizeidetektiv, ein Bankräuber und ein mächtiger Makler treten nach dem glänzenden Raub des Kriminellen in eine Geiselsituation.', 'Ein uolice detectivL, ein Bankrobher, Xnd ein hoch-pmwer brokea in hoch-sta

Unnamed: 0,Value,Time
translate - t5-base - delete_characters - Robustness,1.0,5.546017
translate - t5-base - replace_characters - Robustness,0.0,16.150821
translate - t5-base - add_characters - Robustness,1.0,13.781719
translate - t5-base - delete_sentences - Robustness,1.0,4.94616
translate - allenai/wmt16-en-de-12-1 - delete_characters - Robustness,0.0,13.022063
translate - allenai/wmt16-en-de-12-1 - replace_characters - Robustness,0.0,2.453343
translate - allenai/wmt16-en-de-12-1 - add_characters - Robustness,0.0,2.320187
translate - allenai/wmt16-en-de-12-1 - delete_sentences - Robustness,1.0,1.471549


In [5]:
fill_masks_results = await M_ASR_without_Bing(models_fillmask, perturbations_without_bing, attributes_without_bing)
fill_masks_results

fillmask vinai/bertweet-base delete_characters Robustness
indexes_to_delete [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 30, 31, 32, 33]
mask_index 29
Antes [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 30, 31, 32, 33]
Despues [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 31, 32, 33]
Antes [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 31, 32, 33]
Despues [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 32, 33]
Antes [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 32, 33]
Despues [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 33]
Antes [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 33]
Despues [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27]
indexes_to_delete2

Unnamed: 0,Value,Time
fillmask - vinai/bertweet-base - delete_characters - Robustness,0.0,1.183037
fillmask - vinai/bertweet-base - replace_characters - Robustness,0.0,1.157433
fillmask - vinai/bertweet-base - add_characters - Robustness,0.0,1.160296
fillmask - roberta-base - delete_characters - Robustness,0.0,1.931882
fillmask - roberta-base - replace_characters - Robustness,0.0,2.581094
fillmask - roberta-base - add_characters - Robustness,0.0,2.073338
