Note:
- MFT(Minimum Functionality Test): focuses on evaluating whether a model has the basic functionality 
- DIR(Directional Expectation test). determine whether a model’s predictions are consistent with a prior expectation or hypothesis 
- INV (Invariance testing) is a type of testing in ML that checks whether a model is invariant to certain transformations or changes in the input data. 



ref:
- https://www.godeltech.com/how-to-automate-the-testing-process-for-machine-learning-systems/

To test each test case fail or not, depends on the `Label` that provide in line like this
```test = MFT(**t, labels=0, name=name, capability = 'Vocabulary',description=desc)```
ps. it can be changed depends on the type(MFT, DIR, or INV) that select to test 

For the sentimental revise
- 'negative': 0
- 'positive': 2
- 'neutral': 1

In [None]:
%load_ext autoreload
%autoreload 2

import checklist
import spacy
import itertools

import checklist.editor
import checklist.text_generation
from checklist.test_types import MFT, INV, DIR
from checklist.expect import Expect
import numpy as np
import spacy
from checklist.test_suite import TestSuite
from checklist.perturb import Perturb
from transformers import pipeline

In [2]:
editor = checklist.editor.Editor() # creates an instance of the Editor class 
editor.tg # generate the new text data of the Checklist library fro the testing model and suggest the word

<checklist.text_generation.TextGenerator at 0x22846604850>

In [3]:
import csv
r = csv.DictReader(open('Tweets.csv')) # read file Tweets.csv (airline dataset)

# Initialize array to store the data
labels = []
confs = []
airlines = []
tdata = []
reasons = []

# Append data from csv file into the array
for row in r:
    sentiment, conf, airline, text = row['airline_sentiment'], row['airline_sentiment_confidence'], row['airline'], row['text']
    labels.append(sentiment) # this is airline_sentiment contain `positive`, `negative`, and `neutral`
    confs.append(conf) # append the score of confidence
    airlines.append(airline) # append airline name
    tdata.append(text) # append the text into tdata array
    reasons.append(row['negativereason']) 

mapping = {'negative': 0, 'positive': 2, 'neutral': 1} # define to map the label data into number
labels = np.array([mapping[x] for x in labels]).astype(int)

In [4]:
# This is the model from Spacy library used for the NLP task below in the parsed_question. to parse data into the pipeline
nlp = spacy.load('en_core_web_sm')

In [5]:
# turn data into pipeline then convert to list, and saved the result in `parsed_data`
sentences = tdata
parsed_data = list(nlp.pipe(sentences))

In [6]:
# Test suite is container for the unit test. used for the test case.
suite = TestSuite()

## Capability: Vocabulary

### MFTs focus on basic function

In [7]:
# define the noun in the array
air_noun = ['flight', 'seat', 'pilot', 'staff', 'service', 'customer service', 'aircraft', 'plane', 'food', 'cabin crew', 'company', 'airline', 'crew']

# add the noun array in to lexicon which can use like mask. On this line, it will be used in {air_noun} which can be changed when testing.
editor.add_lexicon('air_noun', air_noun)

In [8]:
# suggest the mask from the context. In this context mask provided the adjective.
print(', '.join(editor.suggest('It was {a:mask} {air_noun}.')[:40]))

  to_pred = torch.tensor(to_pred, device=self.device).to(torch.int64)


great, good, excellent, amazing, incredible, extraordinary, terrible, expensive, bad, fantastic, wonderful, interesting, new, nice, unusual, important, exceptional, beautiful, American, ordinary, different, efficient, poor, enormous, special, average, real, awful, easy, professional, awesome, horrible, fine, huge, perfect, remarkable, international, simple, terrific, solid


In [9]:
# define positive, negative, and neutral adjective
pos_adj = ['good', 'great', 'excellent', 'amazing', 'extraordinary', 'beautiful', 'fantastic', 'nice', 'incredible', 'exceptional', 'awesome', 'perfect', 'fun', 'happy', 'adorable', 'brilliant', 'exciting', 'sweet', 'wonderful']
neg_adj = ['awful', 'bad', 'horrible', 'weird', 'rough', 'lousy', 'unhappy', 'average', 'difficult', 'poor', 'sad', 'frustrating', 'hard', 'lame', 'nasty', 'annoying', 'boring', 'creepy', 'dreadful', 'ridiculous', 'terrible', 'ugly', 'unpleasant']
neutral_adj = ['American', 'international',  'commercial', 'British', 'private', 'Italian', 'Indian', 'Australian', 'Israeli', ]

# add dictionary of word into object 
editor.add_lexicon('pos_adj', pos_adj, overwrite=True)
editor.add_lexicon('neg_adj', neg_adj, overwrite=True )
editor.add_lexicon('neutral_adj', neutral_adj, overwrite=True)

In [10]:
# suggest the mask from the context. In this context mask provided the verb
print(', '.join(editor.suggest('I really {mask} the {air_noun}.')[:200]))

liked, enjoyed, like, appreciated, appreciate, loved, enjoy, love, miss, missed, respect, got, prefer, wanted, needed, admired, hate, recommend, preferred, value, disliked, respected, dislike, hated, need, used, felt, valued, want, took, admire, feel, trust, dig, understood, noticed, underestimated, welcomed, tried, found, dug, understand, adore, use, get, praised, did, likes, left, mean, considered, think, saw, favor, welcome, liking, trusted, had, regret, remember, cherish, helped, treasure, supported, cherished, picked, have, enjoying, followed, consider, bought, support, compliment, thank, met, chose, credit, commend, know, applaud, thought, believe, rate, see, lost, watched, LOVE, embraced, minded, impressed, experienced, envy, blame, packed, improved, follow, fancy, take, recommended, underestimate, choose, was, saved, expected, thanked, beat, owe, mind, forgive, joined, learned, resent, do, knew, hit, read, leave, regretted, earned, deserved, made, finished, anticipated, rocked,

In [11]:
# define positive, negative, and neutral verb
pos_verb_present = ['like', 'enjoy', 'appreciate', 'love',  'recommend', 'admire', 'value', 'welcome']
neg_verb_present = ['hate', 'dislike', 'regret',  'abhor', 'dread', 'despise' ]
neutral_verb_present = ['see', 'find']

# define past participle verb
pos_verb_past = ['liked', 'enjoyed', 'appreciated', 'loved', 'admired', 'valued', 'welcomed']
neg_verb_past = ['hated', 'disliked', 'regretted',  'abhorred', 'dreaded', 'despised']
neutral_verb_past = ['saw', 'found']

# add dictionary of word into object 
editor.add_lexicon('pos_verb_present', pos_verb_present, overwrite=True)
editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.add_lexicon('neutral_verb_present', neutral_verb_present, overwrite=True)
editor.add_lexicon('pos_verb_past', pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb_past', neg_verb_past, overwrite=True)
editor.add_lexicon('neutral_verb_past', neutral_verb_past, overwrite=True)

# also add into `pos_verb`, `neg_verb`, `neutral_verb` object
editor.add_lexicon('pos_verb', pos_verb_present+ pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb', neg_verb_present + neg_verb_past, overwrite=True)
editor.add_lexicon('neutral_verb', neutral_verb_present + neutral_verb_past, overwrite=True)

Individual words

In [12]:
# Add individual word test: positive verb
test = MFT(pos_adj + pos_verb_present + pos_verb_past, labels=2)
suite.add(test, 'single positive words', 'Vocabulary', '')

In [13]:
# Add individual word test: negative verb
test = MFT(neg_adj + neg_verb_present + neg_verb_past, labels=0)
suite.add(test, 'single negative words', 'Vocabulary', '')

In [14]:
# Add individual word test: neutral verb
test = MFT(neutral_adj + neutral_verb_present + neutral_verb_past, labels=1)
suite.add(test, 'single neutral words', 'Vocabulary', 'TODO_DESCRIPTION')

Words in context

In [15]:
# Add the test. On each line provided different context with the mask provided
t = editor.template('{it} {air_noun} {be} {pos_adj}.', it=['The', 'This', 'That'], be=['is', 'was'], labels=2, save=True) #need to define first
t += editor.template('{it} {be} {a:pos_adj} {air_noun}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=2, save=True) #after first line needed to use += to append test on the list
t += editor.template('{i} {pos_verb} {the} {air_noun}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=2, save=True)
t += editor.template('{it} {air_noun} {be} {neg_adj}.', it=['That', 'This', 'The'], be=['is', 'was'], labels=0, save=True)
t += editor.template('{it} {be} {a:neg_adj} {air_noun}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=0, save=True)
t += editor.template('{i} {neg_verb} {the} {air_noun}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=0, save=True)

# **t means unpacking the dictionary into keyword argument
test = MFT(**t)
suite.add(test, 'Sentiment-laden words in context', 'Vocabulary', 'Use positive and negative verbs and adjectives with airline nouns such as seats, pilot, flight, etc. E.g. "This was a bad flight"')


In [16]:
# add dictionary of word into object 
editor.lexicons['neutral_verb']

['see', 'find', 'saw', 'found']

In [17]:
# Add the test. On each line provided different context with the mask provided
t = editor.template('{it} {air_noun} {be} {neutral_adj}.', it=['That', 'This', 'The'], be=['is', 'was'], save=True) #need to define first
t += editor.template('{it} {be} {a:neutral_adj} {air_noun}.', it=['It', 'This', 'That'], be=['is', 'was'], save=True) #after first line needed to use += to append test on the list
t += editor.template('{i} {neutral_verb} {the} {air_noun}.', i=['I', 'We'], the=['this', 'that', 'the'], save=True)
test = MFT(t.data, labels=1, templates=t.templates)
suite.add(test, 'neutral words in context', 'Vocabulary', 'Use neutral verbs and adjectives with airline nouns such as seats, pilot, flight, etc. E.g. "The pilot is American"')

### Intensifiers and reducers

In [18]:
# suggest the mask from the context. In this context mask provided the adverb
print(' , '.join(editor.suggest('{it} {be} {a:mask} {pos_adj} {air_noun}.', it=['It', 'This', 'That'], be=['is', 'was'])[:50]))

very , really , truly , absolutely , pretty , quite , incredibly , extremely , most , just , amazingly , utterly , unbelievably , totally , simply , rather , almost , extraordinarily , genuinely , especially , exceptionally , equally , absolute , entirely , certainly , obviously , altogether , exceedingly , fairly , enormously , unexpectedly , insanely , undeniably , particularly , seriously , immensely , unusually , extraordinary , overwhelmingly , otherwise , overall , amazing , surprisingly , admittedly , incredible , actually , completely , appropriately , fucking , all


In [19]:
# define adverb
intens_adj = ['very', 'really', 'absolutely', 'truly', 'extremely', 'quite', 'incredibly', 'amazingly', 'especially', 'exceptionally', 'unbelievably', 'utterly', 'exceedingly', 'rather', 'totally', 'particularly']

In [20]:
# suggest the mask from the context. In this context mask provided the adverb
print(', '.join(editor.suggest('{i} {mask} {pos_verb} {the} {air_noun}.', i=['I', 'We'], the=['this', 'that', 'the'])[:100]))

really, always, just, also, definitely, still, actually, certainly, truly, absolutely, never, greatly, especially, quite, so, particularly, personally, highly, thoroughly, totally, do, only, very, rather, sure, sincerely, honestly, did, completely, genuinely, obviously, even, strongly, much, already, most, immediately, clearly, simply, deeply, fully, mostly, both, have, seriously, almost, generally, too, feel, would, often, again, kinda, usually, finally, all, hugely, specifically, surely, should, dearly, basically, could, had, REALLY, felt, will, now, fucking, ..., can, tremendously, desperately, probably, must, cannot, …, immensely, might, extremely, ultimately, literally, ever, may, incredibly, frankly, secretly, I, more, think, was, were, instantly, gotta, vastly, forever, normally, sorely, quickly, long


In [24]:
# define adverb
intens_verb = [ 'really', 'absolutely', 'truly', 'extremely',  'especially',  'utterly',  'totally', 'particularly', 'highly', 'definitely', 'certainly', 'genuinely', 'honestly', 'strongly', 'sure', 'sincerely']

In [25]:
monotonic_label = Expect.monotonic(increasing=True, tolerance=0.1) #label the model: prediction is increasing like pattern and tolerance=0.1. 
non_neutral_pred = lambda pred, *args, **kwargs: pred != 1 #to check that the neutral is not equal 1, used to filter the neutral out
monotonic_label = Expect.slice_pairwise(monotonic_label, non_neutral_pred) #combine label

In [26]:
# add dictionary of word into object 
# need to define first then, use `+=` to add. for protecting duplicating problem
t = editor.template(['{it} {be} {a:pos_adj} {air_noun}.', '{it} {be} {a:intens} {pos_adj} {air_noun}.'] , intens=intens_adj, it=['It', 'This', 'That'], be=['is', 'was'], nsamples=500, save=True)
t += editor.template(['{i} {pos_verb} {the} {air_noun}.', '{i} {intens} {pos_verb} {the} {air_noun}.'], intens=intens_verb, i=['I', 'We'], the=['this', 'that', 'the'], nsamples=500, save=True)
t += editor.template(['{it} {be} {a:neg_adj} {air_noun}.', '{it} {be} {a:intens} {neg_adj} {air_noun}.'] , intens=intens_adj, it=['It', 'This', 'That'], be=['is', 'was'], nsamples=500, save=True)
t += editor.template(['{i} {neg_verb} {the} {air_noun}.', '{i} {intens} {neg_verb} {the} {air_noun}.'], intens=intens_verb, i=['I', 'We'], the=['this', 'that', 'the'], nsamples=500, save=True)
t.data[:5] #show array in 5 list


[['It was a fantastic service.', 'It was a quite fantastic service.'],
 ['That was a great cabin crew.', 'That was an utterly great cabin crew.'],
 ['It is an exciting staff.', 'It is an exceedingly exciting staff.'],
 ['It is a good cabin crew.', 'It is an especially good cabin crew.'],
 ['It was an awesome seat.', 'It was a particularly awesome seat.']]

In [27]:
# add dictionary of word into object 
# need to define first then, use `+=` to add. for protecting duplicating problem
t = editor.template(['{it} {be} {a:pos_adj} {air_noun}.', '{it} {be} {a:intens} {pos_adj} {air_noun}.'] , intens=intens_adj, it=['It', 'This', 'That'], be=['is', 'was'], nsamples=500, save=True)
t += editor.template(['{i} {pos_verb} {the} {air_noun}.', '{i} {intens} {pos_verb} {the} {air_noun}.'], intens=intens_verb, i=['I', 'We'], the=['this', 'that', 'the'], nsamples=500, save=True)
t += editor.template(['{it} {be} {a:neg_adj} {air_noun}.', '{it} {be} {a:intens} {neg_adj} {air_noun}.'] , intens=intens_adj, it=['It', 'This', 'That'], be=['is', 'was'], nsamples=500, save=True)
t += editor.template(['{i} {neg_verb} {the} {air_noun}.', '{i} {intens} {neg_verb} {the} {air_noun}.'], intens=intens_verb, i=['I', 'We'], the=['this', 'that', 'the'], nsamples=500, save=True)
test = DIR(t.data, monotonic_label, templates=t.templates) #used the monotonic_label
description = '''Test is composed of pairs of sentences (x1, x2), where we add an intensifier
such as "really",or "very" to x2 and expect the confidence to NOT go down (with tolerance=0.1). e.g.:
x1 = "That was a good flight"
x2 = "That was a very good flight"
We disregard cases where the prediction of x1 is neutral.
'''
#add the test into suite
suite.add(test, 'intensifiers', 'Vocabulary', description)


In [28]:
# define reducer adjective to reduce the meaning.
reducer_adj = ['somewhat', 'kinda', 'mostly', 'probably', 'generally', 'reasonably', 'a little', 'a bit', 'slightly']

In [29]:
monotonic_label_down = Expect.monotonic(increasing=False, tolerance=0.1) #label the model: prediction is increasing like pattern and tolerance=0.1. 
monotonic_label_down = Expect.slice_pairwise(monotonic_label_down, non_neutral_pred) #combine label

In [30]:
# add dictionary of word into object 
# need to define first then, use `+=` to add. for protecting duplicating problem
t = editor.template(['{it} {air_noun} {be} {pos_adj}.', '{it} {air_noun} {be} {red} {pos_adj}.'] , red=reducer_adj, it=['The', 'This', 'That'], be=['is', 'was'], nsamples=1000, save=True)
t += editor.template(['{it} {air_noun} {be} {neg_adj}.', '{it} {air_noun} {be} {red} {neg_adj}.'] , red=reducer_adj, it=['The', 'This', 'That'], be=['is', 'was'], nsamples=1000, save=True)
t.data[:50] #show array

[['The seat is extraordinary.', 'The seat is reasonably extraordinary.'],
 ['This food was perfect.', 'This food was slightly perfect.'],
 ['That company is nice.', 'That company is mostly nice.'],
 ['This customer service is great.',
  'This customer service is a little great.'],
 ['The seat was brilliant.', 'The seat was a little brilliant.'],
 ['That crew was adorable.', 'That crew was a bit adorable.'],
 ['That company is amazing.', 'That company is reasonably amazing.'],
 ['The crew is fantastic.', 'The crew is slightly fantastic.'],
 ['The cabin crew was good.', 'The cabin crew was mostly good.'],
 ['That customer service is brilliant.',
  'That customer service is generally brilliant.'],
 ['This staff was exciting.', 'This staff was kinda exciting.'],
 ['That flight is happy.', 'That flight is generally happy.'],
 ['The flight was great.', 'The flight was somewhat great.'],
 ['That customer service was brilliant.',
  'That customer service was a bit brilliant.'],
 ['That custome

In [31]:
# add dictionary of word into object 
# need to define first then, use `+=` to add. for protecting duplicating problem
t = editor.template(['{it} {air_noun} {be} {pos_adj}.', '{it} {air_noun} {be} {red} {pos_adj}.'] , red=reducer_adj, it=['The', 'This', 'That'], be=['is', 'was'], nsamples=1000, save=True)
t += editor.template(['{it} {air_noun} {be} {neg_adj}.', '{it} {air_noun} {be} {red} {neg_adj}.'] , red=reducer_adj, it=['The', 'This', 'That'], be=['is', 'was'], nsamples=1000, save=True)
test = DIR(t.data, monotonic_label_down, templates=t.templates)
description = '''Test is composed of pairs of sentences (x1, x2), where we add a reducer
such as "somewhat", or "kinda" to x2 and expect the confidence to NOT go up (with tolerance=0.1). e.g.:
x1 = "The cabin crew was good."
x2 = "The cabin crew was somewhat good."
We disregard cases where the prediction of x1 is neutral.
'''

#add the test into suite. This test case is about reducer adjective to ne more neutral
suite.add(test, 'reducers', 'Vocabulary', description)


### INVariance: change neutral words

In [32]:
#define set of neutral word
neutral_words = set(
    ['.', 'the', 'The', ',', 'a', 'A', 'and', 'of', 'to', 'it', 'that', 'in',
     'this', 'for',  'you', 'there', 'or', 'an', 'by', 'about', 'flight', 'my',
     'in', 'of', 'have', 'with', 'was', 'at', 'it', 'get', 'from', 'this', 'Flight', 'plane'
    ])

forbidden = set(['No', 'no', 'Not', 'not', 'Nothing', 'nothing', 'without', 'but'] + pos_adj + neg_adj + pos_verb_present + pos_verb_past + neg_verb_present + neg_verb_past)

# function to change the set of neutral word to other context.
def change_neutral(d):
    examples = []
    subs = []
    words_in = [x for x in d.capitalize().split() if x in neutral_words]
    if not words_in:
        return None
    for w in words_in:
        suggestions = [x for x in editor.suggest_replace(d, w, beam_size=5, words_and_sentences=True) if x[0] not in forbidden]
        examples.extend([x[1] for x in suggestions])
        subs.extend(['%s -> %s' % (w, x[0]) for x in suggestions])
    if examples:
        idxs = np.random.choice(len(examples), min(len(examples), 10), replace=False)
        return [examples[i] for i in idxs]#, [subs[i] for i in idxs])


In [33]:
t = Perturb.perturb(sentences, change_neutral, nsamples=500)
test = INV(t.data) #test type is Invariance

# BERT means using `Perturb.perturb(...)`
description = 'Change a set of neutral words with other context-appropriate neutral words (using BERT).' 
suite.add(test, 'change neutral words with BERT', 'Vocabulary', description)


### Add negative and positive phrases

In [34]:
# define and add context into positive variable
positive = editor.template('I {pos_verb_present} you.').data
positive += editor.template('You are {pos_adj}.').data
positive += ['I would fly with you again.']
positive.remove('You are happy.') #remove feeling meaning

# define and add context into negative variable
negative = editor.template('I {neg_verb_present} you.').data
negative += editor.template('You are {neg_adj}.').data
negative += ['Never flying with you again.']

# function to add phrase
def add_phrase_function(phrases):
    def pert(d):
        while d[-1].pos_ == 'PUNCT':
            d = d[:-1]
        d = d.text
        ret = [d + '. ' + x for x in phrases]
        idx = np.random.choice(len(ret), 10, replace=False)
        ret = [ret[i] for i in idx]
        return ret
    return pert


In [35]:
# function to change positive sentiment probability after perturbation
def positive_change(orig_conf, conf):
    softmax = type(orig_conf) in [np.array, np.ndarray]
    if not softmax or orig_conf.shape[0] != 3:
        raise(Exception('Need prediction function to be softmax with 3 labels (negative, neutral, positive)'))
    return orig_conf[0] - conf[0] + conf[2] - orig_conf[2]

# If the change in positive sentiment probability is within 0.1, then the result will return True if the total change is equal to or greater than 0, 
# indicating an increase or no change in positive sentiment. Otherwise, it will return the sum of the calculated change and the tolerance.
def diff_up(orig_pred, pred, orig_conf, conf, labels=None, meta=None):
    tolerance = 0.1
    change = positive_change(orig_conf, conf)
    if change + tolerance >= 0:
        return True
    else:
        return change + tolerance
    
# checks whether the positive sentiment probability has changed by 0.1 or more. If the change is zero or negative, it returns True, 
# meaning the positive sentiment probability has decreased or stayed the same. Otherwise, it returns the negative value of the change.
def diff_down(orig_pred, pred, orig_conf, conf, labels=None, meta=None):
    tolerance = 0.1
    change = positive_change(orig_conf, conf)
    if change - tolerance <= 0:
        return True
    else:
        return -(change - tolerance)

goes_up = Expect.pairwise(diff_up) #for adding positive phrase
goes_down = Expect.pairwise(diff_down) #for adding negative phrase
    

In [36]:
# This test case provided the adding very positive test. When adding the very positive phrase, the probabilily of positive will not go down
t = Perturb.perturb(parsed_data, add_phrase_function(positive), nsamples=500)
test = DIR(t.data, goes_up) 
description = 'Add very positive phrases (e.g. I love you) to the end of sentences, expect probability of positive to NOT go down (tolerance=0.1)'
suite.add(test, 'add positive phrases', 'Vocabulary', description)


In [37]:
# This test case provided the adding very negative test. When adding the very negative phrase, the probabilily of positive will not go up
t = Perturb.perturb(parsed_data, add_phrase_function(negative), nsamples=500)
test = DIR(t.data, goes_down)
description = 'Add very negative phrases (e.g. I hate you) to the end of sentences, expect probability of positive to NOT go up (tolerance=0.1)'
suite.add(test, 'add negative phrases', 'Vocabulary', description)


## Capability: robustness
### INVariance: adding irrelevant stuff before and after.


In [38]:
import string

# generate random string
def random_string(n):
    return ''.join(np.random.choice([x for x in string.ascii_letters + string.digits], n))

# generate random urls with the random url
def random_url(n=6):
    return 'https://t.co/%s' % random_string(n)

#generates a random Twitter handle in the `@%s`format
def random_handle(n=6):
    return '@%s' % random_string(n)

# function that used to add the irrelevant data
def add_irrelevant(sentence):
    urls_and_handles = [random_url(n=6) for _ in range(5)] + [random_handle() for _ in range(5)]
    irrelevant_before = ['@airline '] + urls_and_handles
    irrelevant_after = urls_and_handles 
    rets = ['%s %s' % (x, sentence) for x in irrelevant_before ]
    rets += ['%s %s' % (sentence, x) for x in irrelevant_after]
    return rets


In [39]:
# This test case is used for testing if adding random url and handle at the start or end function. 
t = Perturb.perturb(sentences, add_irrelevant, nsamples=500)
test = INV(t.data)
suite.add(test, 'add random urls and handles', 'Robustness', 'add randomly generated urls and handles to the start or end of sentence')


### punctuation, contractions, typos

In [40]:
# This test case provided the punctuation 
t = Perturb.perturb(parsed_data, Perturb.punctuation, nsamples=500)
test = INV(t.data)
suite.add(test, 'punctuation', 'Robustness', 'strip punctuation and / or add "."')


In [41]:
# This test case provided the typos test. e.g., `;` `?`
t = Perturb.perturb(sentences, Perturb.add_typos, nsamples=500, typos=1)
test = INV(t.data)
suite.add(test, 'typos', 'Robustness', 'Add one typo to input by swapping two adjacent characters')


In [42]:
# This test case provided the 2 typos in test. e.g., `;` `?`
t = Perturb.perturb(sentences, Perturb.add_typos, nsamples=500, typos=2)
test = INV(t.data)
suite.add(test, '2 typos', 'Robustness', 'Add two typos to input by swapping two adjacent characters twice')


In [43]:
# this test case about expanding the contraction. For example; do not -> don't
t = Perturb.perturb(sentences, Perturb.contractions, nsamples=1000)
test = INV(t.data)
suite.add(test, 'contractions', 'Robustness', 'Contract or expand contractions, e.g. What is -> What\'s')

## Capability: NER

In [44]:
# This test case about adding the Perturb names. Replace the name with common names
t = Perturb.perturb(parsed_data, Perturb.change_names, nsamples=1000)
test = INV(t.data)
suite.add(test, 'change names', 'NER', 'Replace names with other common names')

In [45]:
# This test case about adding the Perturb location names. Replace the name with other location names
t = Perturb.perturb(parsed_data, Perturb.change_location, nsamples=1000)
test = INV(t.data)
suite.add(test, 'change locations', 'NER', 'Replace city or country names with other cities or countries')

## Capability: temporal awareness

In [None]:
# check whats include in object `neg_verb_present`
editor.template('{neg_verb_present}').data

['hate', 'dislike', 'regret', 'abhor', 'dread', 'despise']

In [47]:
# add dictionary of word into object 
change = ['but', 'even though', 'although', '']

# need to define first then, use `+=` to add. for protecting duplicating problem
t = editor.template(['I used to think this airline was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this airline is {pos_adj}, {change} I used to think it was {neg_adj}.',
                                 'In the past I thought this airline was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this airline is {pos_adj}, {change} in the past I thought it was {neg_adj}.',
                                ] ,
                                 change=change, unroll=True, nsamples=500, save=True, labels=2)
t += editor.template(['I used to {neg_verb_present} this airline, {change} now I {pos_verb_present} it.',
                                 'I {pos_verb_present} this airline, {change} I used to {neg_verb_present} it.',
                                 'In the past I would {neg_verb_present} this airline, {change} now I {pos_verb} it.',
                                 'I {pos_verb_present} this airline, {change} in the past I would {neg_verb_present} it.',
                                ] ,
                                change=change, unroll=True, nsamples=500, save=True, labels=2)

t += editor.template(['I used to think this airline was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this airline is {neg_adj}, {change} I used to think it was {pos_adj}.',
                                 'In the past I thought this airline was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this airline is {neg_adj}, {change} in the past I thought it was {pos_adj}.',
                                ] ,
                                 change=change, unroll=True, nsamples=500, save=True, labels=0)
t += editor.template(['I used to {pos_verb_present} this airline, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this airline, {change} I used to {pos_verb_present} it.',
                                 'In the past I would {pos_verb_present} this airline, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this airline, {change} in the past I would {pos_verb_present} it.',
                                ] ,
                                change=change, unroll=True, nsamples=500, save=True, labels=0)
test = MFT(**t) # basic function test
description = '''Have two conflicing statements, one about the past and one about the present.
Expect the present to carry the sentiment. Examples:
I used to love this airline, now I hate it -> should be negative
I love this airline, although I used to hate it -> should be positive
'''
suite.add(test, 'used to, but now', 'Temporal', description)



used to should reduce

In [48]:
# Append the data, add more context. To reduce the meaning feeling to become less strong meaning.
t = editor.template(['{it} {be} {a:adj} {air_noun}.', 'I used to think {it} {be} {a:adj} {air_noun}.'], it=['it', 'this', 'that'], be=['is', 'was'], adj=editor.lexicons['pos_adj'] + editor.lexicons['neg_adj'], save=True)
t += editor.template(['{i} {verb} {the} {air_noun}.', '{i} used to {verb} {the} {air_noun}.'], i=['I', 'We'], the=['this', 'that', 'the'], verb=editor.lexicons['pos_verb_present'] + editor.lexicons['neg_verb_present'], save=True)
t.data[:5]


[['it is a good flight.', 'I used to think it is a good flight.'],
 ['it is a good seat.', 'I used to think it is a good seat.'],
 ['it is a good pilot.', 'I used to think it is a good pilot.'],
 ['it is a good staff.', 'I used to think it is a good staff.'],
 ['it is a good service.', 'I used to think it is a good service.']]

In [49]:
# Add the test case: add used to reduce confidence
t = editor.template(['{it} {be} {a:adj} {air_noun}.', 'I used to think {it} {be} {a:adj} {air_noun}.'], it=['it', 'this', 'that'], be=['is', 'was'], adj=editor.lexicons['pos_adj'] + editor.lexicons['neg_adj'], save=True)
t += editor.template(['{i} {verb} {the} {air_noun}.', '{i} used to {verb} {the} {air_noun}.'], i=['I', 'We'], the=['this', 'that', 'the'], verb=editor.lexicons['pos_verb_present'] + editor.lexicons['neg_verb_present'], save=True)
test = DIR(t.data, monotonic_label_down, templates=t.templates)
suite.add(test, '"used to" should reduce', 'Temporal', 'A model should not be more confident on "I used to think X" when compared to "X", e.g. "I used to love this airline" should have less confidence than "I love this airline"')


## Capability: Fairness

In [50]:
editor.lexicons.keys() #suggest the word. help to have more ideas to add into object

dict_keys(['male', 'female', 'first_name', 'first_pronoun', 'last_name', 'country', 'nationality', 'city', 'religion', 'religion_adj', 'sexual_adj', 'sentiment', 'country_city', 'male_from', 'female_from', 'last_from', 'air_noun', 'pos_adj', 'neg_adj', 'neutral_adj', 'pos_verb_present', 'neg_verb_present', 'neutral_verb_present', 'pos_verb_past', 'neg_verb_past', 'neutral_verb_past', 'pos_verb', 'neg_verb', 'neutral_verb'])

In [51]:
editor.template('{a:religion_adj}').data

['a Christian',
 'a Protestant',
 'a Roman Catholic',
 'an Eastern Orthodox',
 'an Anglican',
 'a Jew',
 'an Orthodox Jew',
 'a Muslim',
 'a Sunni',
 "a Shi'a",
 'an Ahmadiyya',
 'a Buddhist',
 'a Zoroastrian',
 'a Hindu',
 'a Sikh',
 'a Shinto',
 "a Baha'i",
 'a Taoist',
 'a Confucian',
 'a Jain',
 'an Atheist',
 'an Agnostic']

In [52]:
# add dictionary of word into object 
# this protect is to protect that might be sensible
protected = {
    'race': ['a black','a hispanic', 'a white', 'an asian'],
    'sexual': editor.template('{a:sexual_adj}').data,
    'religion': editor.template('{a:religion_adj}').data,
    'nationality': editor.template('{a:nationality}').data[:20],
}

In [53]:
# t.data[:10]

In [54]:
# Add the test case about Fairness. Prediction should be the same if there has the various adjective
for p, vals in protected.items():
    print(p)
    t = editor.template(['{male} is %s {mask}.' % r for r in vals], return_maps=False, nsamples=300, save=True)
    t += editor.template(['{female} is %s {mask}.' % r for r in vals], return_maps=False, nsamples=300, save=True)
    test = INV(t.data, threshold=0.1, templates=t.templates)
    suite.add(test, 'protected: %s' % p, 'Fairness', 'Prediction should be the same for various adjectives within a protected class')
#     test.run(new_pp)
#     test.summary(n=3)
#     print()
#     preds = np.array(test.results.preds)
#     for i, x in enumerate(vals):
#         print('%.2f %s' % (preds[:, i].mean(), vals[i]))
#     print()
#     print()
#     print('-------------------------')

race
sexual
religion
nationality



## Capability: Negation

Simple templates:

In [55]:
# add the simple negation test case
t = editor.template('{it} {air_noun} {nt} {pos_adj}.', it=['This', 'That', 'The'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('{it} {benot} {a:pos_adj} {air_noun}.', it=['It', 'This', 'That'], benot=['is not',  'isn\'t', 'was not', 'wasn\'t'], save=True)
neg = ['I can\'t say I', 'I don\'t', 'I would never say I', 'I don\'t think I', 'I didn\'t' ]
t += editor.template('{neg} {pos_verb_present} {the} {air_noun}.', neg=neg, the=['this', 'that', 'the'], save=True)
t += editor.template('No one {pos_verb_present}s {the} {air_noun}.', neg=neg, the=['this', 'that', 'the'], save=True)
test = MFT(t.data, labels=0, templates=t.templates)
suite.add(test, 'simple negations: negative', 'Negation', 'Very simple negations of positive statements')


In [56]:
# add the simple negation test case.
t = editor.template('{it} {air_noun} {nt} {neg_adj}.', it=['This', 'That', 'The'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('{it} {benot} {a:neg_adj} {air_noun}.', it=['It', 'This', 'That'], benot=['is not',  'isn\'t', 'was not', 'wasn\'t'], save=True)
neg = ['I can\'t say I', 'I don\'t', 'I would never say I', 'I don\'t think I', 'I didn\'t' ]
t += editor.template('{neg} {neg_verb_present} {the} {air_noun}.', neg=neg, the=['this', 'that', 'the'], save=True)
t += editor.template('No one {neg_verb_present}s {the} {air_noun}.', neg=neg, the=['this', 'that', 'the'], save=True)
# expectation: prediction is not 0
is_not_0 = lambda x, pred, *args: pred != 0 # expect not equal 0 means not equal to negative that label when importing the csv
test = MFT(t.data, Expect.single(is_not_0), templates=t.templates)
suite.add(test, 'simple negations: not negative', 'Negation', 'Very simple negations of negative statements. Expectation requires prediction to NOT be negative (i.e. neutral or positive)')


In [57]:
# add not neutral is still neutral test case
# e.g., I thought the airline would be Italian, but it wasn't. 
t = editor.template('{it} {air_noun} {nt} {neutral_adj}.', it=['This', 'That', 'The'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('{it} {benot} {a:neutral_adj} {air_noun}.', it=['It', 'This', 'That'], benot=['is not',  'isn\'t', 'was not', 'wasn\'t'], save=True)
neg = ['I can\'t say I', 'I don\'t', 'I would never say I', 'I don\'t think I', 'I didn\'t' ]
t += editor.template('{neg} {neutral_verb_present} {the} {air_noun}.', neg=neg, the=['this', 'that', 'the'], save=True)
test = MFT(t.data, labels=1, templates=t.templates)
suite.add(test, 'simple negations: not neutral is still neutral', 'Negation', 'Negating neutral statements should still result in neutral predictions')


Different templates:

In [58]:
air_noun_it = [x for x in editor.lexicons['air_noun'] if x != 'pilot'] # without using `pilot` because it needs to use object not person
t = editor.template('I thought {it} {air_noun} would be {pos_adj}, but it {neg}.', air_noun=air_noun_it, neg=['was not', 'wasn\'t'], it=['this', 'that', 'the'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('I thought I would {pos_verb_present} {the} {air_noun}, but I {neg}.', neg=['did not', 'didn\'t'], the=['this', 'that', 'the'], save=True)
test = MFT(t.data, labels=0, templates=t.templates) # expect to be negative
suite.add(test, 'simple negations: I thought x was positive, but it was not (should be negative)', 'Negation', '', overwrite=True)


In [59]:
# e.g., I thought I would despise the company, but I didn't.
t = editor.template('I thought {it} {air_noun} would be {neg_adj}, but it {neg}.', air_noun=air_noun_it, neg=['was not', 'wasn\'t'], it=['this', 'that', 'the'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('I thought I would {neg_verb_present} {the} {air_noun}, but I {neg}.', neg=['did not', 'didn\'t'], the=['this', 'that', 'the'], save=True)
# expectation: prediction is not 0 (negative)
test = MFT(t.data, Expect.single(is_not_0), templates=t.templates)
suite.add(test, 'simple negations: I thought x was negative, but it was not (should be neutral or positive)', 'Negation', '')


In [60]:
# e.g., I thought the airline would be Italian, but it wasn't
t = editor.template('I thought {it} {air_noun} would be {neutral_adj}, but it {neg}.', air_noun=air_noun_it, neg=['was not', 'wasn\'t'], it=['this', 'that', 'the'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('I thought I would {neutral_verb_present} {the} {air_noun}, but I {neg}.', neg=['did not', 'didn\'t'], the=['this', 'that', 'the'], save=True)
# expectation: prediction is not 0 (negative)
test = MFT(t.data, labels=1, templates=t.templates) # label = 1 means expect to be  Neutral
suite.add(test, 'simple negations: but it was not (neutral) should still be neutral', 'Negation', '')


Harder: negation with neutral in the middle

In [61]:
# add the test case: negation with neutral in the middle.
# e.g.,  I wouldn't say, given my history with airplanes, that this pilot is excellent.
# expect to be negative
neutral =['that I am from Brazil', 'my history with airplanes', 'all that I\'ve seen over the years', 'the time that I\'ve been flying', 'it\'s a Tuesday']
t = editor.template('{neg}, given {neutral}, that {it} {air_noun} {be} {pos_adj}.', neutral=neutral, neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {it} {be} {a:pos_adj} {air_noun}.',neutral=neutral,  neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {i} {pos_verb_present} {the} {air_noun}.',neutral=neutral,  neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], i=['I', 'we'], the=['this', 'that', 'the'], save=True)
t.data = list(np.random.choice(t.data, 1000, replace=False))
test = MFT(t.data, labels=0, templates=t.templates)
suite.add(test, 'Hard: Negation of positive with neutral stuff in the middle (should be negative)', 'Negation', '')


In [62]:
# add the test case: negation with neutral in the middle.
# e.g., I don't think, given all that I've seen over the years, that the service is difficult.
# expect to be positive or neutral
neutral =['that I am from Brazil', 'my history with airplanes', 'all that I\'ve seen over the years', 'the time that I\'ve been flying', 'it\'s a Tuesday']
t = editor.template('{neg}, given {neutral}, that {it} {air_noun} {be} {neg_adj}.', neutral=neutral, neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {it} {be} {a:neg_adj} {air_noun}.',neutral=neutral,  neg=['i don\'t think', 'i can\'t say', 'i wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {i} {neg_verb_present} {the} {air_noun}.',neutral=neutral,  neg=['i don\'t think', 'i can\'t say', 'i wouldn\'t say'], i=['I', 'we'], the=['this', 'that', 'the'], save=True)
t.data = list(np.random.choice(t.data, 1000, replace=False))
test = MFT(t.data, Expect.single(is_not_0), templates=t.templates) #expect that is not the negative
suite.add(test, 'Hard: Negation of negative with neutral stuff in the middle (should be positive or neutral)', 'Negation', '')


In [63]:
# add the test case: negation with neutral in the middle.
# e.g., I can't say, given it's a Tuesday, that that aircraft was British.
# expect to be neutral
neutral =['that I am from Brazil', 'my history with airplanes', 'all that I\'ve seen over the years', 'the time that I\'ve been flying', 'it\'s a Tuesday']
t = editor.template('{neg}, given {neutral}, that {it} {air_noun} {be} {neutral_adj}.', neutral=neutral, neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {it} {be} {a:neutral_adj} {air_noun}.',neutral=neutral,  neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {i} {neutral_verb_present} {the} {air_noun}.',neutral=neutral,  neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], i=['I', 'we'], the=['this', 'that', 'the'], save=True)
t.data = list(np.random.choice(t.data, 1000, replace=False))
test = MFT(t.data, labels=1, templates=t.templates)
suite.add(test, 'negation of neutral with neutral in the middle, should still neutral', 'Negation', '')



## Capability: SRL

my opinion is more important than others

In [64]:
# add the test case about my opinion is important than others
change = [' but', '']
templates = ['Some people think you are {neg_adj},{change} I think you are {pos_adj}.',
             'I think you are {pos_adj},{change} some people think you are {neg_adj}.',
             'I had heard you were {neg_adj},{change} I think you are {pos_adj}.',
             'I think you are {pos_adj},{change} I had heard you were {neg_adj}.',
             ]
t = editor.template(templates, change=change, unroll=True, labels=2, save=True)

templates = ['{others} {neg_verb_present} you,{change} I {pos_verb_present} you.',
             'I {pos_verb_present} you,{change} {others} {neg_verb_present} you.',
            ]
others = ['some people', 'my parents', 'my friends', 'people']
t += editor.template(templates, others=others, change=change, unroll=True, labels=2, save=True) #expect positive

change = [' but', '']
templates = ['Some people think you are {pos_adj},{change} I think you are {neg_adj}.',
             'I think you are {neg_adj},{change} some people think you are {pos_adj}.',
             'I had heard you were {pos_adj},{change} I think you are {neg_adj}.',
             'I think you are {neg_adj},{change} I had heard you were {pos_adj}.',
             ]
t += editor.template(templates, change=change, unroll=True, labels=0, save=True) #expect negative

templates = ['{others} {pos_verb_present} you,{change} I {neg_verb_present} you.',
             'I {neg_verb_present} you,{change} {others} {pos_verb_present} you.',
            ]
others = ['some people', 'my parents', 'my friends', 'people']
t += editor.template(templates, others=others, change=change, unroll=True, labels=0, save=True) #expect negative

test = MFT(**t)
description = '''Have conflicting statements where the author has an opinion and a third party has a contrary opinion.
Expect sentiment to be the authors'. Example:
"Some people think you are great, but I think you are terrible" -> should be negative
'''
suite.add(test, 'my opinion is what matters', 'SRL', description)


q & a form: yes

In [65]:
# add the q and a test case

# e.g., Do I think that food was international? Yes
# label positive
t = editor.template('Do I think {it} {air_noun} {be} {pos_adj}? Yes', it=['that', 'this', 'the'], be=['is', 'was'], save=True, labels=2)
t += editor.template('Do I think {it} {be} {a:pos_adj} {air_noun}? Yes', it=['it', 'this', 'that'], be=['is', 'was'], save=True, labels=2)
t += editor.template('Did {i} {pos_verb_present} {the} {air_noun}? Yes', i=['I', 'we'], the=['this', 'that', 'the'], save=True, labels=2)

# label negative
t += editor.template('Do I think {it} {air_noun} {be} {neg_adj}? Yes', it=['that', 'this', 'the'], be=['is', 'was'], save=True, labels=0)
t += editor.template('Do I think {it} {be} {a:neg_adj} {air_noun}? Yes', it=['it', 'this', 'that'], be=['is', 'was'], save=True, labels=0)
t += editor.template('Did {i} {neg_verb_present} {the} {air_noun}? Yes', i=['I', 'we'], the=['this', 'that', 'the'], save=True, labels=0)
test = MFT(**t)
suite.add(test, 'Q & A: yes', 'SRL', 'TODO_DESCRIPTION')


In [66]:
# add more test case about qna expect to be neutral
t = editor.template('Do I think {it} {air_noun} {be} {neutral_adj}? Yes', it=['that', 'this', 'the'], be=['is', 'was'], save=True)
t += editor.template('Do I think {it} {be} {a:neutral_adj} {air_noun}? Yes', it=['it', 'this', 'that'], be=['is', 'was'], save=True)
t += editor.template('Did {i} {neutral_verb_present} {the} {air_noun}? Yes', i=['I', 'we'], the=['this', 'that', 'the'], save=True)
test = MFT(t.data, labels=1, templates=t.templates)
suite.add(test, 'Q & A: yes (neutral)', 'SRL', 'TODO_DESCRIPTION')


In [67]:
# label negative
t = editor.template('Do I think {it} {air_noun} {be} {pos_adj}? No', it=['that', 'this', 'the'], be=['is', 'was'], save=True, labels=0)
t += editor.template('Do I think {it} {be} {a:pos_adj} {air_noun}? No', it=['it', 'this', 'that'], be=['is', 'was'], save=True, labels=0)
t += editor.template('Did {i} {pos_verb_present} {the} {air_noun}? No', i=['I', 'we'], the=['this', 'that', 'the'], save=True, labels=0)

# label to be neutral
t += editor.template('Do I think {it} {air_noun} {be} {neg_adj}? No', it=['that', 'this', 'the'], be=['is', 'was'], save=True, labels=1)
t += editor.template('Do I think {it} {be} {a:neg_adj} {air_noun}? No', it=['it', 'this', 'that'], be=['is', 'was'], save=True, labels=1)
t += editor.template('Did {i} {neg_verb_present} {the} {air_noun}? No', i=['I', 'we'], the=['this', 'that', 'the'], save=True, labels=1)

# firstly, if label=1 (neutral),then check that is not negative. otherwise, pred label equal to label
allow_for_neutral = lambda x, pred, _, label, _2 : pred != 0 if label == 1 else pred == label
test = MFT(t.data, Expect.single(allow_for_neutral), labels=t.labels, templates=t.templates) #when testing, expect label to be the same
suite.add(test, 'Q & A: no', 'SRL', 'TODO_DESCRIPTION')


In [68]:
# test case that have `no` answer. 
t = editor.template('Do I think {it} {air_noun} {be} {neutral_adj}? No', it=['that', 'this', 'the'], be=['is', 'was'], save=True)
t += editor.template('Do I think {it} {be} {a:neutral_adj} {air_noun}? No', it=['it', 'this', 'that'], be=['is', 'was'], save=True)
t += editor.template('Did {i} {neutral_verb_present} {the} {air_noun}? No', i=['I', 'we'], the=['this', 'that', 'the'], save=True)

# expect to be neutral
test = MFT(t.data, labels=1, templates=t.templates)
suite.add(test, 'Q & A: no (neutral)', 'SRL', 'TODO_DESCRIPTION')


In [70]:
# update parameter name
for test in suite.tests:
    suite.tests[test].name = test
    suite.tests[test].description = suite.info[test]['description]']
    suite.tests[test].capability = suite.info[test]['capability']

In [71]:
path = 'sentiment_suite.pkl' # define path
suite.save(path) # save suite (test case) to the path