
# Sentimental Dataset 2
This notebook is responsible for creating test cases for sentiment analysis. After processing all test suites, it will save the test cases to sentimental_suite_dt2.pkl, and this notebook is used to create and preprocess a second specific sentiment analysis dataset espescialy from climate-related tweets. Test on following capabilities
    - Capability: Vocabulary
        - MFTs
        - Intensifiers and reducers
        - Add positive phrases
    - Capability: Robustness
        - punctuation, contractions, typos
    - Capability: Temporal Awareness
    - Capability: Negation
    - Capability: SRL

Note:
- MFT(Minimum Functionality Test): focuses on evaluating whether a model has the basic functionality 
- DIR(Directional Expectation test). determine whether a model’s predictions are consistent with a prior expectation or hypothesis 
- INV (Invariance testing) is a type of testing in ML that checks whether a model is invariant to certain transformations or changes in the input data. 



ref:
- https://www.godeltech.com/how-to-automate-the-testing-process-for-machine-learning-systems/

To test each test case fail or not, depends on the `Label` that provide in line like this
```test = MFT(**t, labels=0, name=name, capability = 'Vocabulary',description=desc)```
ps. it can be changed depends on the type(MFT, DIR, or INV) that select to test 

For the sentimental scenario 2
- ' Risk': 0
- ' Opportunity': 2
- ' Neutral': 1

#ps. i find the word in the google. For some word array, i didn't use the suggest mask because I think word is not appropriate that much


In [1]:
%load_ext autoreload
%autoreload 2

import checklist
import spacy
import itertools

import checklist.editor
import checklist.text_generation
from checklist.test_types import MFT, INV, DIR
from checklist.expect import Expect
import numpy as np
import spacy
from checklist.test_suite import TestSuite
from checklist.perturb import Perturb
from transformers import pipeline

In [2]:
editor = checklist.editor.Editor() # creates an instance of the Editor class 
editor.tg # generate the new text data of the Checklist library fro the testing model and suggest the word

<checklist.text_generation.TextGenerator at 0x260a00c3350>

In [4]:
import csv
r = csv.DictReader(open('climate.csv')) # read file climate dataset

# This dataset has only 2 column
labels = []
# confs = []
# airlines = []
tdata = []
# reasons = []

# Append data from csv file into the array
for row in r:
    sentiment, text = row['label'], row['text']
    labels.append(sentiment)
    # confs.append(conf)
    # airlines.append(airline)
    tdata.append(text)
    # reasons.append(row['negativereason'])

mapping = {' Risk': 0, ' Opportunity': 2, ' Neutral': 1} # define to map the label data into number
labels = np.array([mapping[x] for x in labels]).astype(int)

In [5]:
# This is the model from Spacy library used for the NLP task below in the parsed_question. to parse data into the pipeline
nlp = spacy.load('en_core_web_sm')

In [6]:
# turn data into pipeline then convert to list, and saved the result in `parsed_data`
sentences = tdata
parsed_data = list(nlp.pipe(sentences))

In [7]:
# Test suite is container for the unit test. used for the test case.
suite = TestSuite()

## Capability: Vocabulary

### MFTs

In [8]:
# define the noun in the array
climate_noun = ['climate zone', 'pollution', 'environment', 'global warming', 'atmosphere', 'temperature', 'conditions', 'weather', 'seasonality', 'precipitation', 
            'humidity', 'ecosystem', 'environment']

# add the noun array in to lexicon which can use like mask. On this line, it will be used in {climate_noun} which can be changed when testing.
editor.add_lexicon('climate_noun', climate_noun)

In [10]:
# define positive, negative, and neutral adjective. 
#ps. i find the word in the google, and i didn't use the suggest mask the word. i think word is not appropriate that much
pos_adj = ['delightful', 'stunning', 'brilliant', 'joyful', 'refreshing', 'invigorating', 'pleasant', 'lovely', 'beautiful', 'serene']
neg_adj = ['dreadful', 'harsh', 'gloomy', 'bleak', 'miserable', 'severe', 'oppressive', 'stormy', 'chilling', 'unpleasant']
neutral_adj = ['moderate', 'mild', 'variable', 'seasonal', 'typical', 'average', 'standard', 'usual', 'regular', 'current']

# add dictionary of word into object 
editor.add_lexicon('pos_adj', pos_adj, overwrite=True)
editor.add_lexicon('neg_adj', neg_adj, overwrite=True )
editor.add_lexicon('neutral_adj', neutral_adj, overwrite=True)

In [12]:
# define positive, negative, and neutral verb
pos_verb_present = ['shine', 'warm', 'refresh', 'calm', 'invigorate']
neg_verb_present = ['bluster', 'chills', 'floods',  'ravages', 'threatens']
neutral_verb_present = ['change', 'fall', 'happen', 'persist', 'continue']

# define past participle verb
pos_verb_past = [ 'shone', 'warmed', 'refreshed', 'calmed', 'Invigorated']
neg_verb_past = ['blustered', 'chilled', 'flooded',  'ravaged', 'threatened']
neutral_verb_past = ['changed', 'fell', 'happened', 'persisted', 'continued']

# add dictionary of word into object 
editor.add_lexicon('pos_verb_present', pos_verb_present, overwrite=True)
editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.add_lexicon('neutral_verb_present', neutral_verb_present, overwrite=True)
editor.add_lexicon('pos_verb_past', pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb_past', neg_verb_past, overwrite=True)
editor.add_lexicon('neutral_verb_past', neutral_verb_past, overwrite=True)
editor.add_lexicon('pos_verb', pos_verb_present+ pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb', neg_verb_present + neg_verb_past, overwrite=True)
editor.add_lexicon('neutral_verb', neutral_verb_present + neutral_verb_past, overwrite=True)

Individual words

In [13]:
# Add individual word test: positive verb
test = MFT(pos_adj + pos_verb_present + pos_verb_past, labels=2)
suite.add(test, 'single positive words', 'Vocabulary', '')

In [14]:
# Add individual word test: negative verb
test = MFT(neg_adj + neg_verb_present + neg_verb_past, labels=0)
suite.add(test, 'single negative words', 'Vocabulary', '')

In [15]:
# Add individual word test: neutral verb
test = MFT(neutral_adj + neutral_verb_present + neutral_verb_past, labels=1)
suite.add(test, 'single neutral words', 'Vocabulary', 'TODO_DESCRIPTION')

Words in context

In [16]:
# Add the test. On each line provided different context with the mask provided
t = editor.template('{it} {climate_noun} {be} {pos_adj}.', it=['The', 'This', 'That'], be=['is', 'was'], labels=2, save=True)
t += editor.template('{it} {be} {a:pos_adj} {climate_noun}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=2, save=True)
t += editor.template('{i} {pos_verb} {the} {climate_noun}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=2, save=True)
t += editor.template('{it} {climate_noun} {be} {neg_adj}.', it=['That', 'This', 'The'], be=['is', 'was'], labels=0, save=True)
t += editor.template('{it} {be} {a:neg_adj} {climate_noun}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=0, save=True)
t += editor.template('{i} {neg_verb} {the} {climate_noun}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=0, save=True)

# **t means unpacking the dictionary into keyword argument
test = MFT(**t)
suite.add(test, 'Sentiment-laden words in context', 'Vocabulary', 'Use positive and negative verbs and adjectives with climate nouns such as climate zone, pollution, environment, global warming, atmosphere, etc. E.g. "This was a worsr pollution"',overwrite=True)


In [17]:
# add dictionary of word into object 
editor.lexicons['neutral_verb']

['change',
 'fall',
 'happen',
 'persist',
 'continue',
 'changed',
 'fell',
 'happened',
 'persisted',
 'continued']

In [18]:
# Firstly, need to define.
# Next, needed to use += to append test on the list 
t = editor.template('{it} {climate_noun} {be} {neutral_adj}.', it=['That', 'This', 'The'], be=['is', 'was'], save=True)
t += editor.template('{it} {be} {a:neutral_adj} {climate_noun}.', it=['It', 'This', 'That'], be=['is', 'was'], save=True)
t += editor.template('{i} {neutral_verb} {the} {climate_noun}.', i=['I', 'We'], the=['this', 'that', 'the'], save=True)
test = MFT(t.data, labels=1, templates=t.templates)
suite.add(test, 'neutral words in context', 'Vocabulary', 'Use neutral verbs and adjectives with climate nouns such as climate zone, pollution, environment, global warming, atmosphere, etc. E.g. "The atmosphere is stable"')

### Intensifiers and reducers

In [19]:
# suggest the mask from the context. In this context mask provided the adverb
print(' , '.join(editor.suggest('{it} {be} {a:mask} {pos_adj} {climate_noun}.', it=['It', 'This', 'That'], be=['is', 'was'])[:50]))

very , really , absolutely , extremely , rather , surprisingly , quite , incredibly , truly , amazingly , utterly , exceptionally , extraordinarily , most , unusually , especially , overall , entirely , almost , unbelievably , otherwise , absolute , exceedingly , intensely , immensely , generally , amazing , altogether , overwhelmingly , obviously , pretty , unusual , equally , oddly , extraordinary , unexpectedly , simply , a , particularly , actual , appropriately , actually , apparently , ideal , wonderfully , excellent , fairly , enormous , remarkably , increasingly


In [20]:
# define adverb
intens_adj = ['very', 'really', 'absolutely', 'truly', 'extremely', 'quite', 'incredibly', 'amazingly', 'especially', 'exceptionally', 'unbelievably', 'utterly', 'exceedingly', 'rather', 'totally', 'particularly']

In [21]:
# suggest the mask from the context. In this context mask provided the adverb
print(', '.join(editor.suggest('{i} {mask} {pos_verb} {the} {climate_noun}.', i=['I', 'We'], the=['this', 'that', 'the'])[:100]))

have, just, also, really, never, had, actually, always, simply, almost, still, finally, definitely, absolutely, only, certainly, totally, even, literally, completely, already, truly, intentionally, mean, got, successfully, nearly, would, rather, basically, deliberately, probably, think, greatly, merely, immediately, then, want, like, first, need, quickly, could, further, did, will, wanted, feel, can, kinda, often, effectively, must, cannot, hope, should, all, strongly, was, fully, hardly, barely, essentially, get, slowly, actively, now, felt, were, mostly, so, constantly, shall, generally, may, love, obviously, fundamentally, seriously, accidentally, say, might, recently, both, again, do, sure, gently, see, thoroughly, specifically, helped, temporarily, saw, are, personally, said, somehow, purposely, eventually


In [23]:
# define adverb
intens_verb = [ 'really', 'absolutely', 'truly', 'extremely',  'especially',  'utterly',  'totally', 'particularly', 'highly', 'definitely', 'certainly', 'genuinely', 'honestly', 'strongly', 'sure', 'sincerely']

In [24]:
monotonic_label = Expect.monotonic(increasing=True, tolerance=0.1) #label the model: prediction is increasing like pattern and tolerance=0.1. 
non_neutral_pred = lambda pred, *args, **kwargs: pred != 1 #to check that the neutral is not equal 1, used to filter the neutral out
monotonic_label = Expect.slice_pairwise(monotonic_label, non_neutral_pred) #combine label

In [25]:
# add dictionary of word into object 
# need to define first then, use `+=` to add. for protecting duplicating problem
t = editor.template(['{it} {be} {a:pos_adj} {climate_noun}.', '{it} {be} {a:intens} {pos_adj} {climate_noun}.'] , intens=intens_adj, it=['It', 'This', 'That'], be=['is', 'was'], nsamples=500, save=True)
t += editor.template(['{i} {pos_verb} {the} {climate_noun}.', '{i} {intens} {pos_verb} {the} {climate_noun}.'], intens=intens_verb, i=['I', 'We'], the=['this', 'that', 'the'], nsamples=500, save=True)
t += editor.template(['{it} {be} {a:neg_adj} {climate_noun}.', '{it} {be} {a:intens} {neg_adj} {climate_noun}.'] , intens=intens_adj, it=['It', 'This', 'That'], be=['is', 'was'], nsamples=500, save=True)
t += editor.template(['{i} {neg_verb} {the} {climate_noun}.', '{i} {intens} {neg_verb} {the} {climate_noun}.'], intens=intens_verb, i=['I', 'We'], the=['this', 'that', 'the'], nsamples=500, save=True)
t.data[:5]


[['It is a delightful temperature.',
  'It is a particularly delightful temperature.'],
 ['This is a serene precipitation.', 'This is a truly serene precipitation.'],
 ['That was an invigorating seasonality.',
  'That was an especially invigorating seasonality.'],
 ['It is a stunning humidity.', 'It is an exceedingly stunning humidity.'],
 ['It is a stunning ecosystem.', 'It is an incredibly stunning ecosystem.']]

In [26]:
# add dictionary of word into object 
# need to define first then, use `+=` to add. for protecting duplicating problem
t = editor.template(['{it} {be} {a:pos_adj} {climate_noun}.', '{it} {be} {a:intens} {pos_adj} {climate_noun}.'] , intens=intens_adj, it=['It', 'This', 'That'], be=['is', 'was'], nsamples=500, save=True)
t += editor.template(['{i} {pos_verb} {the} {climate_noun}.', '{i} {intens} {pos_verb} {the} {climate_noun}.'], intens=intens_verb, i=['I', 'We'], the=['this', 'that', 'the'], nsamples=500, save=True)
t += editor.template(['{it} {be} {a:neg_adj} {climate_noun}.', '{it} {be} {a:intens} {neg_adj} {climate_noun}.'] , intens=intens_adj, it=['It', 'This', 'That'], be=['is', 'was'], nsamples=500, save=True)
t += editor.template(['{i} {neg_verb} {the} {climate_noun}.', '{i} {intens} {neg_verb} {the} {climate_noun}.'], intens=intens_verb, i=['I', 'We'], the=['this', 'that', 'the'], nsamples=500, save=True)
test = DIR(t.data, monotonic_label, templates=t.templates)
description = '''Test is composed of pairs of sentences (x1, x2), where we add an intensifier
such as "really",or "very" to x2 and expect the confidence to NOT go down (with tolerance=0.1). e.g.:
x1 = "That was a nice weather"
x2 = "That was a very good atmostphere"
We disregard cases where the prediction of x1 is neutral.
'''

#add the test into suite
suite.add(test, 'intensifiers', 'Vocabulary', description)


In [27]:
# define reducer adjective to reduce the meaning.
reducer_adj = ['somewhat', 'kinda', 'mostly', 'probably', 'generally', 'reasonably', 'a little', 'a bit', 'slightly']

In [28]:
monotonic_label_down = Expect.monotonic(increasing=False, tolerance=0.1) #label the model: prediction is increasing like pattern and tolerance=0.1. 
monotonic_label_down = Expect.slice_pairwise(monotonic_label_down, non_neutral_pred) #combine label

In [29]:
# add dictionary of word into object 
# need to define first then, use `+=` to add. for protecting duplicating problem
t = editor.template(['{it} {climate_noun} {be} {pos_adj}.', '{it} {climate_noun} {be} {red} {pos_adj}.'] , red=reducer_adj, it=['The', 'This', 'That'], be=['is', 'was'], nsamples=1000, save=True)
t += editor.template(['{it} {climate_noun} {be} {neg_adj}.', '{it} {climate_noun} {be} {red} {neg_adj}.'] , red=reducer_adj, it=['The', 'This', 'That'], be=['is', 'was'], nsamples=1000, save=True)
t.data[:50]

[['This humidity was serene.', 'This humidity was somewhat serene.'],
 ['That humidity was delightful.', 'That humidity was mostly delightful.'],
 ['The climate zone is invigorating.',
  'The climate zone is slightly invigorating.'],
 ['That conditions is brilliant.', 'That conditions is somewhat brilliant.'],
 ['That atmosphere is serene.', 'That atmosphere is kinda serene.'],
 ['That precipitation was pleasant.',
  'That precipitation was reasonably pleasant.'],
 ['The precipitation was serene.', 'The precipitation was generally serene.'],
 ['The environment is beautiful.', 'The environment is a bit beautiful.'],
 ['The precipitation was invigorating.',
  'The precipitation was kinda invigorating.'],
 ['That atmosphere is invigorating.',
  'That atmosphere is a little invigorating.'],
 ['This pollution is delightful.', 'This pollution is reasonably delightful.'],
 ['The environment is lovely.', 'The environment is generally lovely.'],
 ['This climate zone is refreshing.',
  'This cli

In [30]:
# add dictionary of word into object 
# need to define first then, use `+=` to add. for protecting duplicating problem
t = editor.template(['{it} {climate_noun} {be} {pos_adj}.', '{it} {climate_noun} {be} {red} {pos_adj}.'] , red=reducer_adj, it=['The', 'This', 'That'], be=['is', 'was'], nsamples=1000, save=True)
t += editor.template(['{it} {climate_noun} {be} {neg_adj}.', '{it} {climate_noun} {be} {red} {neg_adj}.'] , red=reducer_adj, it=['The', 'This', 'That'], be=['is', 'was'], nsamples=1000, save=True)
test = DIR(t.data, monotonic_label_down, templates=t.templates)
description = '''Test is composed of pairs of sentences (x1, x2), where we add a reducer
such as "somewhat", or "kinda" to x2 and expect the confidence to NOT go up (with tolerance=0.1). e.g.:
x1 = "The temperature crew was good."
x2 = "The temperature was somewhat good."
We disregard cases where the prediction of x1 is neutral.
'''
suite.add(test, 'reducers', 'Vocabulary', description)


### Add positive phrases

In [31]:
# define and add context into positive variable
positive = editor.template('I {pos_verb_present} weather.').data
positive += editor.template('Weather is {pos_adj}.').data

# define and add context into negative variable
negative = editor.template('I {neg_verb_present} weather.').data
negative += editor.template('Weather are {neg_adj}.').data
negative += ['Never has the atmosphere been so polluted and unsafe for future generations.']

# function to add phrase
def add_phrase_function(phrases):
    def pert(d):
        while d[-1].pos_ == 'PUNCT':
            d = d[:-1]
        d = d.text
        ret = [d + '. ' + x for x in phrases]
        idx = np.random.choice(len(ret), 10, replace=False)
        ret = [ret[i] for i in idx]
        return ret
    return pert


In [32]:
# function to change positive sentiment probability after perturbation
def positive_change(orig_conf, conf):
    softmax = type(orig_conf) in [np.array, np.ndarray]
    if not softmax or orig_conf.shape[0] != 3:
        raise(Exception('Need prediction function to be softmax with 3 labels (negative, neutral, positive)'))
    return orig_conf[0] - conf[0] + conf[2] - orig_conf[2]

# If the change in positive sentiment probability is within 0.1, then the result will return True if the total change is equal to or greater than 0, 
# indicating an increase or no change in positive sentiment. Otherwise, it will return the sum of the calculated change and the tolerance.
def diff_up(orig_pred, pred, orig_conf, conf, labels=None, meta=None):
    tolerance = 0.1
    change = positive_change(orig_conf, conf)
    if change + tolerance >= 0:
        return True
    else:
        return change + tolerance
    
# checks whether the positive sentiment probability has changed by 0.1 or more. If the change is zero or negative, it returns True, 
# meaning the positive sentiment probability has decreased or stayed the same. Otherwise, it returns the negative value of the change.
def diff_down(orig_pred, pred, orig_conf, conf, labels=None, meta=None):
    tolerance = 0.1
    change = positive_change(orig_conf, conf)
    if change - tolerance <= 0:
        return True
    else:
        return -(change - tolerance)
goes_up = Expect.pairwise(diff_up) #for adding positive phrase
goes_down = Expect.pairwise(diff_down) #for adding negative phrase
    

In [33]:
# This test case provided the adding very positive test. When adding the very positive phrase, the probabilily of positive will not go down
t = Perturb.perturb(parsed_data, add_phrase_function(positive), nsamples=500)
test = DIR(t.data, goes_up)
description = 'Add very positive phrases (e.g. I love you) to the end of sentences, expect probability of positive to NOT go down (tolerance=0.1)'
suite.add(test, 'add positive phrases', 'Vocabulary', description)


In [34]:
# have a problem, so i deleted this test case
# t = Perturb.perturb(parsed_data, add_phrase_function(negative), nsamples=500)
# test = DIR(t.data, goes_down)
# description = 'Add very negative phrases (e.g. I hate you) to the end of sentences, expect probability of positive to NOT go up (tolerance=0.1)'
# suite.add(test, 'add negative phrases', 'Vocabulary', description)

## Capability: robustness


### punctuation, contractions, typos

In [35]:
# This test case provided the punctuation 
t = Perturb.perturb(parsed_data, Perturb.punctuation, nsamples=500)
test = INV(t.data)
suite.add(test, 'punctuation', 'Robustness', 'strip punctuation and / or add "."')


In [36]:
# This test case provided the typos test. e.g., `;` `?`
t = Perturb.perturb(sentences, Perturb.add_typos, nsamples=500, typos=1)
test = INV(t.data)
suite.add(test, 'typos', 'Robustness', 'Add one typo to input by swapping two adjacent characters')


In [37]:
# This test case provided the 2 typos in test. e.g., `;` `?`
t = Perturb.perturb(sentences, Perturb.add_typos, nsamples=500, typos=2)
test = INV(t.data)
suite.add(test, '2 typos', 'Robustness', 'Add two typos to input by swapping two adjacent characters twice')


## Capability: temporal awareness

In [38]:
editor.template('{neg_verb_present}').data # check whats include in object `neg_verb_present`

['bluster', 'chills', 'floods', 'ravages', 'threatens']

In [39]:
# add dictionary of word into object 
change = ['but', 'even though', 'although', '']

# need to define first then, use `+=` to add. for protecting duplicating problem
t = editor.template(['I used to think this weather was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this weather is {pos_adj}, {change} I used to think it was {neg_adj}.',
                                 'In the past I thought this weather was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this weather is {pos_adj}, {change} in the past I thought it was {neg_adj}.',
                                ] ,
                                 change=change, unroll=True, nsamples=500, save=True, labels=2)
t += editor.template(['I used to {neg_verb_present} this weather, {change} now I {pos_verb_present} it.',
                                 'I {pos_verb_present} this weather, {change} I used to {neg_verb_present} it.',
                                 'In the past I would {neg_verb_present} this weather, {change} now I {pos_verb} it.',
                                 'I {pos_verb_present} this weather, {change} in the past I would {neg_verb_present} it.',
                                ] ,
                                change=change, unroll=True, nsamples=500, save=True, labels=2)

t += editor.template(['I used to think this weather was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this weather is {neg_adj}, {change} I used to think it was {pos_adj}.',
                                 'In the past I thought this weather was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this weather is {neg_adj}, {change} in the past I thought it was {pos_adj}.',
                                ] ,
                                 change=change, unroll=True, nsamples=500, save=True, labels=0)
t += editor.template(['I used to {pos_verb_present} this weather, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this weather, {change} I used to {pos_verb_present} it.',
                                 'In the past I would {pos_verb_present} this weather, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this weather, {change} in the past I would {pos_verb_present} it.',
                                ] ,
                                change=change, unroll=True, nsamples=500, save=True, labels=0)
test = MFT(**t) # basic function test
description = '''Have two conflicing statements, one about the past and one about the present.
Expect the present to carry the sentiment. Examples:
I used to love this weather, now I hate it -> should be negative
I love this weather, although I used to hate it -> should be positive
'''
suite.add(test, 'used to, but now', 'Temporal', description)



used to should reduce

In [40]:
# Append the data, add more context. To reduce the meaning feeling to become less strong meaning.
t = editor.template(['{it} {be} {a:adj} {climate_noun}.', 'I used to think {it} {be} {a:adj} {climate_noun}.'], it=['it', 'this', 'that'], be=['is', 'was'], adj=editor.lexicons['pos_adj'] + editor.lexicons['neg_adj'], save=True)
t += editor.template(['{i} {verb} {the} {climate_noun}.', '{i} used to {verb} {the} {climate_noun}.'], i=['I', 'We'], the=['this', 'that', 'the'], verb=editor.lexicons['pos_verb_present'] + editor.lexicons['neg_verb_present'], save=True)
t.data[:5]


[['it is a delightful climate zone.',
  'I used to think it is a delightful climate zone.'],
 ['it was a delightful climate zone.',
  'I used to think it was a delightful climate zone.'],
 ['this is a delightful climate zone.',
  'I used to think this is a delightful climate zone.'],
 ['this was a delightful climate zone.',
  'I used to think this was a delightful climate zone.'],
 ['that is a delightful climate zone.',
  'I used to think that is a delightful climate zone.']]

In [41]:
# Add the test case: add used to reduce confidence
t = editor.template(['{it} {be} {a:adj} {climate_noun}.', 'I used to think {it} {be} {a:adj} {climate_noun}.'], it=['it', 'this', 'that'], be=['is', 'was'], adj=editor.lexicons['pos_adj'] + editor.lexicons['neg_adj'], save=True)
t += editor.template(['{i} {verb} {the} {climate_noun}.', '{i} used to {verb} {the} {climate_noun}.'], i=['I', 'We'], the=['this', 'that', 'the'], verb=editor.lexicons['pos_verb_present'] + editor.lexicons['neg_verb_present'], save=True)
test = DIR(t.data, monotonic_label_down, templates=t.templates)
suite.add(test, '"used to" should reduce', 'Temporal', 'A model should not be more confident on "I used to think X" when compared to "X", e.g. "I used to love this airline" should have less confidence than "I love this airline"')



## Capability: Negation

Simple templates:

In [42]:
# add the simple negation test case
t = editor.template('{it} {climate_noun} {nt} {pos_adj}.', it=['This', 'That', 'The'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('{it} {benot} {a:pos_adj} {climate_noun}.', it=['It', 'This', 'That'], benot=['is not',  'isn\'t', 'was not', 'wasn\'t'], save=True)
neg = ['I can\'t say I', 'I don\'t', 'I would never say I', 'I don\'t think I', 'I didn\'t' ]
t += editor.template('{neg} {pos_verb_present} {the} {climate_noun}.', neg=neg, the=['this', 'that', 'the'], save=True)
t += editor.template('No one {pos_verb_present}s {the} {climate_noun}.', neg=neg, the=['this', 'that', 'the'], save=True)
test = MFT(t.data, labels=0, templates=t.templates)
suite.add(test, 'simple negations: negative', 'Negation', 'Very simple negations of positive statements')


In [43]:
# add the simple negation test case.
t = editor.template('{it} {climate_noun} {nt} {neg_adj}.', it=['This', 'That', 'The'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('{it} {benot} {a:neg_adj} {climate_noun}.', it=['It', 'This', 'That'], benot=['is not',  'isn\'t', 'was not', 'wasn\'t'], save=True)
neg = ['I can\'t say I', 'I don\'t', 'I would never say I', 'I don\'t think I', 'I didn\'t' ]
t += editor.template('{neg} {neg_verb_present} {the} {climate_noun}.', neg=neg, the=['this', 'that', 'the'], save=True)
t += editor.template('No one {neg_verb_present}s {the} {climate_noun}.', neg=neg, the=['this', 'that', 'the'], save=True)
# expectation: prediction is not 0 (Risk)
is_not_0 = lambda x, pred, *args: pred != 0
test = MFT(t.data, Expect.single(is_not_0), templates=t.templates)
suite.add(test, 'simple negations: not negative', 'Negation', 'Very simple negations of negative statements. Expectation requires prediction to NOT be negative (i.e. neutral or positive)')


In [44]:
# add not neutral is still neutral test case
t = editor.template('{it} {climate_noun} {nt} {neutral_adj}.', it=['This', 'That', 'The'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('{it} {benot} {a:neutral_adj} {climate_noun}.', it=['It', 'This', 'That'], benot=['is not',  'isn\'t', 'was not', 'wasn\'t'], save=True)
neg = ['I can\'t say I', 'I don\'t', 'I would never say I', 'I don\'t think I', 'I didn\'t' ]
t += editor.template('{neg} {neutral_verb_present} {the} {climate_noun}.', neg=neg, the=['this', 'that', 'the'], save=True)
test = MFT(t.data, labels=1, templates=t.templates) #Expect the Neutral
suite.add(test, 'simple negations: not neutral is still neutral', 'Negation', 'Negating neutral statements should still result in neutral predictions')


Different templates:

In [45]:
climate_noun_it = [x for x in editor.lexicons['climate_noun']]
t = editor.template('I thought {it} {climate_noun} would be {pos_adj}, but it {neg}.', climate_noun=climate_noun_it, neg=['was not', 'wasn\'t'], it=['this', 'that', 'the'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('I thought I would {pos_verb_present} {the} {climate_noun}, but I {neg}.', neg=['did not', 'didn\'t'], the=['this', 'that', 'the'], save=True)
test = MFT(t.data, labels=0, templates=t.templates) # Expect to be Risk
suite.add(test, 'simple negations: I thought x was positive, but it was not (should be negative)', 'Negation', '', overwrite=True)


In [46]:
# e.g., I thought this environment would be dreadful, but it wasn't.
t = editor.template('I thought {it} {climate_noun} would be {neg_adj}, but it {neg}.', climate_noun=climate_noun_it, neg=['was not', 'wasn\'t'], it=['this', 'that', 'the'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('I thought I would {neg_verb_present} {the} {climate_noun}, but I {neg}.', neg=['did not', 'didn\'t'], the=['this', 'that', 'the'], save=True)
# expectation: prediction is not 0 (not risk)
test = MFT(t.data, Expect.single(is_not_0), templates=t.templates)
suite.add(test, 'simple negations: I thought x was negative, but it was not (should be neutral or positive)', 'Negation', '')


In [47]:
# e.g., I thought this atmosphere would be usual, but it wasn't.
t = editor.template('I thought {it} {climate_noun} would be {neutral_adj}, but it {neg}.', climate_noun=climate_noun_it, neg=['was not', 'wasn\'t'], it=['this', 'that', 'the'], nt=['is not', 'isn\'t'], save=True)
t += editor.template('I thought I would {neutral_verb_present} {the} {climate_noun}, but I {neg}.', neg=['did not', 'didn\'t'], the=['this', 'that', 'the'], save=True)
# expectation: prediction is not 0 (not risk)
test = MFT(t.data, labels=1, templates=t.templates) # expect to be Neutral
suite.add(test, 'simple negations: but it was not (neutral) should still be neutral', 'Negation', '')


Harder: negation with neutral in the middle

In [48]:
# add the test case: negation with neutral in the middle.
# Expect to be Risk
neutral =['At Karnataka, we produced and sold 2.2 million tonnes during FY2018,', 'in line with the allocated environmental clearance (EC) limits.', 'The Honourable Supreme Court has increased the cap on production of iron ore for the state from 30 to 35 million tonnes, ', 'and accordingly increase in our allocation for Karnataka from 2.3 to 4.5 million tonnes in May 2018.']
t = editor.template('{neg}, given {neutral}, that {it} {climate_noun} {be} {pos_adj}.', neutral=neutral, neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {it} {be} {a:pos_adj} {climate_noun}.',neutral=neutral,  neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {i} {pos_verb_present} {the} {climate_noun}.',neutral=neutral,  neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], i=['I', 'we'], the=['this', 'that', 'the'], save=True)
t.data = list(np.random.choice(t.data, 1000, replace=False))
test = MFT(t.data, labels=0, templates=t.templates)
suite.add(test, 'Hard: Negation of positive with neutral stuff in the middle (should be negative)', 'Negation', '')


In [49]:
# add the test case: negation with neutral in the middle.
# Expect to be Neutral or Oppurtunity
neutral =['At Karnataka, we produced and sold 2.2 million tonnes during FY2018,', 'in line with the allocated environmental clearance (EC) limits.', 'The Honourable Supreme Court has increased the cap on production of iron ore for the state from 30 to 35 million tonnes, ', 'and accordingly increase in our allocation for Karnataka from 2.3 to 4.5 million tonnes in May 2018.']
t = editor.template('{neg}, given {neutral}, that {it} {climate_noun} {be} {neg_adj}.', neutral=neutral, neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {it} {be} {a:neg_adj} {climate_noun}.',neutral=neutral,  neg=['i don\'t think', 'i can\'t say', 'i wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {i} {neg_verb_present} {the} {climate_noun}.',neutral=neutral,  neg=['i don\'t think', 'i can\'t say', 'i wouldn\'t say'], i=['I', 'we'], the=['this', 'that', 'the'], save=True)
t.data = list(np.random.choice(t.data, 1000, replace=False))
test = MFT(t.data, Expect.single(is_not_0), templates=t.templates)
suite.add(test, 'Hard: Negation of negative with neutral stuff in the middle (should be positive or neutral)', 'Negation', '')


In [50]:
# add the test case: negation with neutral in the middle.
# expect to be neutral
neutral =['At Karnataka, we produced and sold 2.2 million tonnes during FY2018,', 'in line with the allocated environmental clearance (EC) limits.', 'The Honourable Supreme Court has increased the cap on production of iron ore for the state from 30 to 35 million tonnes, ', 'and accordingly increase in our allocation for Karnataka from 2.3 to 4.5 million tonnes in May 2018.']
t = editor.template('{neg}, given {neutral}, that {it} {climate_noun} {be} {neutral_adj}.', neutral=neutral, neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {it} {be} {a:neutral_adj} {climate_noun}.',neutral=neutral,  neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True)
t += editor.template('{neg}, given {neutral}, that {i} {neutral_verb_present} {the} {climate_noun}.',neutral=neutral,  neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], i=['I', 'we'], the=['this', 'that', 'the'], save=True)
t.data = list(np.random.choice(t.data, 1000, replace=False))
test = MFT(t.data, labels=1, templates=t.templates)
suite.add(test, 'negation of neutral with neutral in the middle, should still neutral', 'Negation', '')



## Capability: SRL

my opinion is more important than others

In [51]:
# add the test case about my opinion is important than others
change = [' but', '']
templates = ['Some people think weather are {neg_adj},{change} I think weather are {pos_adj}.',
             'I think weather are {pos_adj},{change} some people think weather are {neg_adj}.',
             'I had heard weather were {neg_adj},{change} I think weather are {pos_adj}.',
             'I think weather are {pos_adj},{change} I had heard weather were {neg_adj}.',
             ]
t = editor.template(templates, change=change, unroll=True, labels=2, save=True)
templates = ['{others} {neg_verb_present} weather,{change} I {pos_verb_present} weather.',
             'I {pos_verb_present} weather,{change} {others} {neg_verb_present} weather.',
            ]
others = ['some people', 'my parents', 'my friends', 'people']
t += editor.template(templates, others=others, change=change, unroll=True, labels=2, save=True)

change = [' but', '']
templates = ['Some people think weather are {pos_adj},{change} I think weather are {neg_adj}.',
             'I think weather are {neg_adj},{change} some people think weather are {pos_adj}.',
             'I had heard weather were {pos_adj},{change} I think weather are {neg_adj}.',
             'I think weather are {neg_adj},{change} I had heard weather were {pos_adj}.',
             ]
t += editor.template(templates, change=change, unroll=True, labels=0, save=True)
templates = ['{others} {pos_verb_present} weather,{change} I {neg_verb_present} weather.',
             'I {neg_verb_present} weather,{change} {others} {pos_verb_present} weather.',
            ]
others = ['some people', 'my parents', 'my friends', 'people']
t += editor.template(templates, others=others, change=change, unroll=True, labels=0, save=True)
test = MFT(**t)
description = '''Have conflicting statements where the author has an opinion and a third party has a contrary opinion.
Expect sentiment to be the authors'. Example:
"Some people think weather are great, but I think weather are terrible" -> should be negative
'''
suite.add(test, 'my opinion is what matters', 'SRL', description)


q & a form: yes

In [52]:
# add the q and a test case

#label: `Oppurtunity`
t = editor.template('Do I think {it} {climate_noun} {be} {pos_adj}? Yes', it=['that', 'this', 'the'], be=['is', 'was'], save=True, labels=2)
t += editor.template('Do I think {it} {be} {a:pos_adj} {climate_noun}? Yes', it=['it', 'this', 'that'], be=['is', 'was'], save=True, labels=2)
t += editor.template('Did {i} {pos_verb_present} {the} {climate_noun}? Yes', i=['I', 'we'], the=['this', 'that', 'the'], save=True, labels=2)

#label: `Risk`
t += editor.template('Do I think {it} {climate_noun} {be} {neg_adj}? Yes', it=['that', 'this', 'the'], be=['is', 'was'], save=True, labels=0)
t += editor.template('Do I think {it} {be} {a:neg_adj} {climate_noun}? Yes', it=['it', 'this', 'that'], be=['is', 'was'], save=True, labels=0)
t += editor.template('Did {i} {neg_verb_present} {the} {climate_noun}? Yes', i=['I', 'we'], the=['this', 'that', 'the'], save=True, labels=0)
test = MFT(**t)
suite.add(test, 'Q & A: yes', 'SRL', 'TODO_DESCRIPTION')


In [53]:
# add more test case about qna expect to be neutral
t = editor.template('Do I think {it} {climate_noun} {be} {neutral_adj}? Yes', it=['that', 'this', 'the'], be=['is', 'was'], save=True)
t += editor.template('Do I think {it} {be} {a:neutral_adj} {climate_noun}? Yes', it=['it', 'this', 'that'], be=['is', 'was'], save=True)
t += editor.template('Did {i} {neutral_verb_present} {the} {climate_noun}? Yes', i=['I', 'we'], the=['this', 'that', 'the'], save=True)
test = MFT(t.data, labels=1, templates=t.templates)
suite.add(test, 'Q & A: yes (neutral)', 'SRL', 'TODO_DESCRIPTION')


In [54]:
# label Risk
t = editor.template('Do I think {it} {climate_noun} {be} {pos_adj}? No', it=['that', 'this', 'the'], be=['is', 'was'], save=True, labels=0)
t += editor.template('Do I think {it} {be} {a:pos_adj} {climate_noun}? No', it=['it', 'this', 'that'], be=['is', 'was'], save=True, labels=0)
t += editor.template('Did {i} {pos_verb_present} {the} {climate_noun}? No', i=['I', 'we'], the=['this', 'that', 'the'], save=True, labels=0)

# label to be neutral
t += editor.template('Do I think {it} {climate_noun} {be} {neg_adj}? No', it=['that', 'this', 'the'], be=['is', 'was'], save=True, labels=1)
t += editor.template('Do I think {it} {be} {a:neg_adj} {climate_noun}? No', it=['it', 'this', 'that'], be=['is', 'was'], save=True, labels=1)
t += editor.template('Did {i} {neg_verb_present} {the} {climate_noun}? No', i=['I', 'we'], the=['this', 'that', 'the'], save=True, labels=1)

# firstly, if label=1 (neutral),then check that is not risk. otherwise, pred label equal to label
allow_for_neutral = lambda x, pred, _, label, _2 : pred != 0 if label == 1 else pred == label
test = MFT(t.data, Expect.single(allow_for_neutral), labels=t.labels, templates=t.templates)
suite.add(test, 'Q & A: no', 'SRL', 'TODO_DESCRIPTION')


In [55]:
# test case that have `no` answer. 
t = editor.template('Do I think {it} {climate_noun} {be} {neutral_adj}? No', it=['that', 'this', 'the'], be=['is', 'was'], save=True)
t += editor.template('Do I think {it} {be} {a:neutral_adj} {climate_noun}? No', it=['it', 'this', 'that'], be=['is', 'was'], save=True)
t += editor.template('Did {i} {neutral_verb_present} {the} {climate_noun}? No', i=['I', 'we'], the=['this', 'that', 'the'], save=True)

# expect to be neutral
test = MFT(t.data, labels=1, templates=t.templates)
suite.add(test, 'Q & A: no (neutral)', 'SRL', 'TODO_DESCRIPTION')


In [57]:
# update parameter name
for test in suite.tests:
    suite.tests[test].name = test
    suite.tests[test].description = suite.info[test]['description]']
    suite.tests[test].capability = suite.info[test]['capability']

In [58]:
path = 'sentiment_dt2.pkl' # define path name to save file
suite.save(path) # save suite (test case) to the path