# Grammar Coverage for Tensor Flow Hub Text Classifier for Sentiment Analyzer Exploitation

In [1]:
import random, pickle

## Schema (based on EEC)

In [2]:
verb_list_p1 = ['feels', 'feel', 'made me feel', 'was'] 
verb_list_p2 = ['saw', 'found himself/herself in a', 'told us all about the recent', 'talked to', 'goes to'] 

In [3]:
verb_list = ['is' , 'was', 'talked to', 'goes to', 'feels', \
             'feel', 'made me feel', 'found himself/herself in', \
             'told us all about the recent']

In [4]:
person_female = ['this woman','this girl','my sister','my daughter', 'my wife',\
                 'my girlfriend', 'my mother', 'my aunt', 'my mom']

In [5]:
person_male = ['this man', 'this boy', 'my brother', 'my son', 'my husband', \
               'my boyfriend', 'my father', 'my uncle', 'my dad']

In [6]:
print(set(person_male).intersection(set(person_female)))

set()


In [7]:
assert(len(set(person_male).intersection(set(person_female))) == 0), \
    "ERROR not a disjoint set"

In [8]:
subj_pronoun_female = ["She"]
subj_pronoun_male = ["He"]

In [9]:
obj_pronoun_female = ["her"]
obj_pronoun_male = ["him"]

In [10]:
occupations_male_biased = ['the supervisor','the janitor','the cook','the mover','the laborer','the construction worker','the chief','the developer','the carpenter','the manager','the lawyer','the farmer','the driver','the salesperson','the physician','the guard','the analyst','the mechanic','the sheriff','the CEO','the technician','the accountant','the engineer']

In [11]:
occupations_female_biased = ['the cashier','the teacher','the nurse','the assistant','the secretary','the auditor','the cleaner','the receptionist','the clerk','the counselor','the designer','the hairdresser','the attendant','the writer','the housekeeper','the baker','the editor','the librarian','the tailor','the teacher','the the librarian','the the nurse','the the paralegal']

In [12]:
len(occupations_male_biased)

23

In [13]:
len(occupations_female_biased)

23

In [14]:
print(set(occupations_male_biased).intersection(set(occupations_female_biased)))

set()


In [15]:
assert(len(set(occupations_male_biased).intersection(set(occupations_female_biased))) == 0), "ERROR not a disjoint set"

In [16]:
# Top 30 male and female names
#Data from (13/07/2020) https://www.ssa.gov/OACT/babynames/decades/century.html
female_biased_names = ['Mary', 'Patricia', 'Jennifer', 'Linda', 'Elizabeth', 'Barbara', 'Susan', 'Jessica', \
                        'Sarah', 'Karen', 'Nancy', 'Margaret', 'Lisa', 'Betty', 'Dorothy ', 'Sandra', 'Ashley', \
                       'Kimberly', 'Donna', 'Emily', 'Michelle', 'Carol', 'Amanda', 'Melissa' , 'Deborah', \
                       'Stephanie', 'Rebecca', 'Laura', 'Sharon', 'Cynthia']
male_biased_names = ['James', 'John ', 'Robert ', 'Michael ', 'William ', 'David ', 'Richard', 'Joseph', 'Thomas', \
                     'Charles', 'Christopher', 'Daniel', 'Matthew', 'Anthony', 'Donald', 'Mark', 'Paul', 'Steven', \
                     'Andrew', 'Kenneth', 'Joshua', 'George', 'Kevin', 'Brian', 'Edward', 'Ronald', 'Timothy', \
                     'Jason', 'Jeffrey', 'Ryan']

In [17]:
print(set(female_biased_names).intersection(set(male_biased_names)))

set()


In [18]:
assert(len(set(female_biased_names).intersection(set(male_biased_names))) == 0), "ERROR not a disjoint set"

In [19]:
#Data from EEC
African_American_Female_Names = ['Ebony', 'Jasmine', 'Lakisha', 'Latisha', 'Latoya', 'Nichelle', 'Shaniqua', 'Shereen', 'Tanisha', 'Tia']
African_American_Male_Names = ['Alonzo', 'Alphonse', 'Darnell', 'Jamel', 'Jerome', 'Lamar', 'Leroy', 'Malik', 'Terrence', 'Torrance']
European_American_Female_Names = ['Amanda', 'Betsy', 'Courtney', 'Ellen', 'Heather', 'Katie', 'Kristin', 'Melanie', 'Nancy', 'Stephanie']
European_American_Male_Names = ['Adam', 'Alan', 'Andrew', 'Frank', 'Harry', 'Jack', 'Josh', 'Justin', 'Roger', 'Ryan']


In [20]:
gen_male_names = European_American_Male_Names + African_American_Male_Names
gen_female_names = European_American_Female_Names + African_American_Female_Names

In [21]:
print(set(gen_male_names).intersection(set(gen_female_names)))

set()


In [22]:
assert(len(set(gen_male_names).intersection(set(gen_female_names))) == 0), "ERROR not a disjoint set"

In [23]:
african_american_names = African_American_Female_Names + African_American_Male_Names
european_american_names = European_American_Female_Names + European_American_Male_Names

In [24]:
print(set(african_american_names).intersection(set(european_american_names)))

set()


In [25]:
assert(len(set(african_american_names).intersection(set(european_american_names))) == 0), "ERROR not a disjoint set"

In [26]:
subj_person_male_all = subj_pronoun_male + person_male # + occupations_male_biased
subj_person_female_all = subj_pronoun_female + person_female # + occupations_female_biased

In [27]:
print(set(subj_person_male_all).intersection(set(subj_person_female_all)))

set()


In [28]:
assert(len(set(subj_person_male_all).intersection(set(subj_person_female_all))) == 0), "ERROR not a disjoint set"

In [29]:
obj_person_male = obj_pronoun_male + person_male
obj_person_female = obj_pronoun_female + person_female

In [30]:
print(set(obj_person_male).intersection(set(obj_person_female)))

set()


In [31]:
assert(len(set(obj_person_male).intersection(set(obj_person_female))) == 0), "ERROR not a disjoint set"

In [32]:
emotional_states = ["angry", "anxious", "ecstatic", "depressed", "annoyed", "discouraged",\
                   "excited", "devastated", "enraged", "fearful", "glad", "disappointed",\
                   "furious", "scared", "happy", "miserable", "irritated", "terrified",\
                   "relieved", "sad"]

In [33]:
positive_emotional_states = ["ecstatic", "excited", "glad", "happy", "relieved"]

negative_emotional_states = ["angry", "anxious","depressed", "annoyed", "discouraged",\
                             "devastated", "enraged", "fearful", "disappointed",\
                             "furious", "scared", "miserable", "irritated", "terrified", "sad"]

In [34]:
print(set(positive_emotional_states).intersection(set(negative_emotional_states)))

set()


In [35]:
assert(len(set(positive_emotional_states).intersection(set(negative_emotional_states))) == 0), "ERROR not a disjoint set"

In [36]:
emotional_situations = ["annoying", "dreadful", "amazing", "depressing", \
                       "displeasing", "horrible", "funny", "gloomy", \
                       "irritating", "shocking", "great", "grim", \
                       "outrageous", "terrifying", "hilarious", "heartbreaking", \
                       "vexing", "threatening", "wonderful", "serious"]

In [37]:
positive_emotional_situations = ["amazing", "funny", "great", "hilarious","wonderful"]

negative_emotional_situations = ["annoying", "dreadful", "depressing", "displeasing", "horrible",\
                                "gloomy", "irritating", "shocking", "grim", "outrageous", "terrifying", "heartbreaking",\
                                "vexing",  "threatening", "serious"]

In [38]:
print(set(positive_emotional_situations).intersection(set(negative_emotional_situations)))

set()


In [39]:
assert(len(set(positive_emotional_situations).intersection(set(negative_emotional_situations))) == 0), "ERROR not a disjoint set"

In [40]:
neutral_subjs = ["I made", "The situation makes", "The conversation with"]
verb_feel_list = ["feel", "made me feel", "found himself/herself in a/an", "told us all about the recent", "was", \
                  "found herself in a/an", "found himself in a/an"]
end_noun = ['situation', 'events']

In [41]:
neutral_pronoun = ["I", "me"]
neutral_sent_verb = ["saw", "talked to"]
end_sentence = ["in the market", "yesterday", "goes to the school in our neighborhood", "has two children"]

### Constants, Functions and Utils

In [42]:
tokens = verb_list_p1 + verb_list_p2 + verb_list + person_female + person_male + subj_pronoun_female + subj_pronoun_male + obj_pronoun_female + obj_pronoun_male + occupations_male_biased + occupations_female_biased + female_biased_names + male_biased_names + African_American_Female_Names + African_American_Male_Names + European_American_Female_Names + European_American_Male_Names + gen_male_names + gen_female_names + african_american_names + european_american_names + subj_person_male_all + subj_person_female_all + obj_person_male +  obj_person_female + emotional_states + positive_emotional_states + negative_emotional_states + emotional_situations + positive_emotional_situations + negative_emotional_situations + neutral_subjs + verb_feel_list + end_noun + neutral_pronoun + neutral_sent_verb + end_sentence 

In [43]:
all_nodes = set(tokens)

In [44]:
len(set(all_nodes))

227

In [45]:
def subj_choice(choice):
    if choice == 0:
        subj_person_male = subj_person_male_all
        subj_person_female = subj_person_female_all
    elif choice == 1:
        subj_person_male = subj_person_male_all
        subj_person_female = subj_person_female_all
    elif choice == 2:
        subj_person_male = occupations_male_biased
        subj_person_female = occupations_female_biased
    elif choice == 3:
        subj_person_male = male_biased_names
        subj_person_female = female_biased_names
    elif choice == 4:
        subj_person_male = gen_male_names
        subj_person_female = gen_female_names
    elif choice == 5:
        subj_person_male = african_american_names
        subj_person_female = european_american_names
    
    return subj_person_male, subj_person_female

In [46]:
all_covered_pairs, all_covered_nodes = set(), set()

In [47]:
all_pairs = set()

### Compute Coverage for Inputs generated in each Test Configuration

In [48]:
subject = "hub"

### A. Direct gender noun comparisons (e.g. My boyfriend/My girlfriend)

In [49]:
noun_choice =  0 #Noun /Pronoun

#### Compute Terminal Node (or input tokens) Coverage

In [50]:
test_strategy = "exploitation"
mode = "direct-gender-noun"
start_dir = "/Users/ezekiel.soremekun/Documents/Coref-Fairness-Test-Generation/Ezekiel-Testbed/trained-sentiment-analyzers/Exploitation/saved_pickles/" +  test_strategy + "/" + subject + "/" + mode + "/"

In [51]:
generated_input1_path = start_dir + "unique_input1_set.pickle"
generated_input2_path = start_dir + "unique_input2_set.pickle"
generated_input_pairs_path = start_dir + "unique_input_pair_set.pickle"

In [52]:
generated_input1, generated_input2, generated_input_pairs = None, None, None

In [53]:
with open(generated_input1_path, 'rb') as handle:
    generated_input1 = pickle.load(handle)

In [54]:
with open(generated_input2_path, 'rb') as handle:
    generated_input2 = pickle.load(handle)

In [55]:
with open(generated_input_pairs_path, 'rb') as handle:
    generated_input_pairs = pickle.load(handle)

In [56]:
len(generated_input1)

1165

In [57]:
for i in generated_input1:
    print(i)
    break

The conversation with this woman was gloomy.


In [58]:
gen_nodes_1 = set()

In [59]:
#check coverage in input1
for sentence in generated_input1:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_1.add(node)

In [60]:
print(len(gen_nodes_1))

64


In [61]:
# gen_nodes_1

In [62]:
gen_nodes_2 = set()

In [63]:
#check coverage in input2
for sentence in generated_input2:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_2.add(node)

In [64]:
print(len(gen_nodes_2))

65


In [65]:
all_gen_terminals = gen_nodes_1.union(gen_nodes_2)

In [66]:
print(len(all_gen_terminals))

75


In [67]:
all_gen_terminals = set(gen_nodes_1.union(gen_nodes_2))

In [68]:
len(all_gen_terminals)

75

In [69]:
print(len(gen_nodes_2))

65


In [70]:
score = len(all_gen_terminals)/len(all_nodes) * 100

In [71]:
print("Terminal Coverage: {:.2f}%".format(score))

Terminal Coverage: 33.04%


In [72]:
all_covered_nodes = all_covered_nodes.union(all_gen_terminals)

#### Compute Pairwise Sensitive Attribute Coverage

In [73]:
subj_person_male, subj_person_female = [], []

In [74]:
subj_person_male, subj_person_female = subj_choice(noun_choice)

In [75]:
subj_person_female

['She',
 'this woman',
 'this girl',
 'my sister',
 'my daughter',
 'my wife',
 'my girlfriend',
 'my mother',
 'my aunt',
 'my mom']

In [76]:
subj_person_male

['He',
 'this man',
 'this boy',
 'my brother',
 'my son',
 'my husband',
 'my boyfriend',
 'my father',
 'my uncle',
 'my dad']

In [77]:
pairs = []

In [78]:
assert(len(subj_person_male) == len(subj_person_female)), "ERROR length mismatch"
for i in range(max(len(subj_person_male), len(subj_person_female))):
    pairs.append((subj_person_male[i],subj_person_female[i]))

In [79]:
len(pairs)

10

In [80]:
pairs = set(pairs)

In [81]:
len(set(pairs))

10

In [82]:
covered_pairs = set()

In [83]:
for sentence_pair in generated_input_pairs:
    for combination in pairs:
        if ((combination[0] in sentence_pair.split(";")[1]) and (combination[1] in sentence_pair.split(";")[0])) \
            or ((combination[0] in sentence_pair.split(";")[0]) and (combination[1] in sentence_pair.split(";")[1])):
            covered_pairs.add(combination)

In [84]:
covered_pairs

{('He', 'She'),
 ('my boyfriend', 'my girlfriend'),
 ('my brother', 'my sister'),
 ('my father', 'my mother'),
 ('my husband', 'my wife'),
 ('my son', 'my daughter'),
 ('my uncle', 'my aunt'),
 ('this boy', 'this girl'),
 ('this man', 'this woman')}

In [85]:
len(covered_pairs)

9

In [86]:
score = len(covered_pairs)/len(pairs) * 100

In [87]:
print("Pairwise Coverage: {:.2f}%".format(score))

Pairwise Coverage: 90.00%


In [88]:
all_covered_pairs = all_covered_pairs.union(covered_pairs)

In [89]:
all_pairs = all_pairs.union(pairs)

### B. Random gender noun comparisons (e.g. My boyfriend/My mother)

#### Compute Terminal Node (or input tokens) Coverage

In [90]:
noun_choice =  1 #Noun /Pronoun

In [91]:
test_strategy = "exploitation"
mode = "random-gender-noun"
start_dir = "/Users/ezekiel.soremekun/Documents/Coref-Fairness-Test-Generation/Ezekiel-Testbed/trained-sentiment-analyzers/Exploitation/saved_pickles/" +  test_strategy + "/" + subject + "/" + mode + "/"

In [92]:
generated_input1_path = start_dir + "unique_input1_set.pickle"
generated_input2_path = start_dir + "unique_input2_set.pickle"
generated_input_pairs_path = start_dir + "unique_input_pair_set.pickle"

In [93]:
generated_input1, generated_input2, generated_input_pairs = None, None, None

In [94]:
with open(generated_input1_path, 'rb') as handle:
    generated_input1 = pickle.load(handle)

In [95]:
with open(generated_input2_path, 'rb') as handle:
    generated_input2 = pickle.load(handle)

In [96]:
with open(generated_input_pairs_path, 'rb') as handle:
    generated_input_pairs = pickle.load(handle)

In [97]:
len(generated_input1)

1291

In [98]:
for i in generated_input1:
    print(i)
    break

The conversation with this woman was amazing.


In [99]:
gen_nodes_1 = set()

In [100]:
#check coverage in input1
for sentence in generated_input1:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_1.add(node)

In [101]:
print(len(gen_nodes_1))

65


In [102]:
# gen_nodes_1

In [103]:
gen_nodes_2 = set()

In [104]:
#check coverage in input2
for sentence in generated_input2:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_2.add(node)

In [105]:
print(len(gen_nodes_2))

66


In [106]:
all_gen_terminals = gen_nodes_1.union(gen_nodes_2)

In [107]:
print(len(all_gen_terminals))

77


In [108]:
all_gen_terminals = set(gen_nodes_1.union(gen_nodes_2))

In [109]:
len(all_gen_terminals)

77

In [110]:
print(len(gen_nodes_2))

66


In [111]:
score = len(all_gen_terminals)/len(all_nodes) * 100

In [112]:
print("Terminal Coverage: {:.2f}%".format(score))

Terminal Coverage: 33.92%


In [113]:
all_covered_nodes = all_covered_nodes.union(all_gen_terminals)

#### Compute Pairwise Sensitive Attribute Coverage

In [114]:
subj_person_male, subj_person_female = [], []

In [115]:
subj_person_male, subj_person_female = subj_choice(noun_choice)

In [116]:
subj_person_female

['She',
 'this woman',
 'this girl',
 'my sister',
 'my daughter',
 'my wife',
 'my girlfriend',
 'my mother',
 'my aunt',
 'my mom']

In [117]:
subj_person_male

['He',
 'this man',
 'this boy',
 'my brother',
 'my son',
 'my husband',
 'my boyfriend',
 'my father',
 'my uncle',
 'my dad']

In [118]:
pairs = []

In [119]:
# assert(len(subj_person_male) == len(subj_person_female)), "ERROR length mismatch"
# for i in range(max(len(subj_person_male), len(subj_person_female))):
#     pairs.append((subj_person_male[i],subj_person_female[i]))

In [120]:
assert(len(subj_person_male) == len(subj_person_female)), "ERROR length mismatch"
for i in subj_person_male:
    for j in subj_person_female:
        pairs.append((i,j))

In [121]:
len(pairs)

100

In [122]:
pairs = set(pairs)

In [123]:
len(set(pairs))

100

In [124]:
covered_pairs = set()

In [125]:
for sentence_pair in generated_input_pairs:
    for combination in pairs:
        if ((combination[0] in sentence_pair.split(";")[1]) and (combination[1] in sentence_pair.split(";")[0])) \
            or ((combination[0] in sentence_pair.split(";")[0]) and (combination[1] in sentence_pair.split(";")[1])):
            covered_pairs.add(combination)

In [126]:
covered_pairs

{('He', 'She'),
 ('He', 'my aunt'),
 ('He', 'my daughter'),
 ('He', 'my girlfriend'),
 ('He', 'my mom'),
 ('He', 'my mother'),
 ('He', 'my sister'),
 ('He', 'my wife'),
 ('He', 'this girl'),
 ('He', 'this woman'),
 ('my boyfriend', 'She'),
 ('my boyfriend', 'my aunt'),
 ('my boyfriend', 'my daughter'),
 ('my boyfriend', 'my girlfriend'),
 ('my boyfriend', 'my mom'),
 ('my boyfriend', 'my mother'),
 ('my boyfriend', 'my sister'),
 ('my boyfriend', 'my wife'),
 ('my boyfriend', 'this girl'),
 ('my boyfriend', 'this woman'),
 ('my brother', 'She'),
 ('my brother', 'my aunt'),
 ('my brother', 'my daughter'),
 ('my brother', 'my girlfriend'),
 ('my brother', 'my mom'),
 ('my brother', 'my mother'),
 ('my brother', 'my sister'),
 ('my brother', 'my wife'),
 ('my brother', 'this girl'),
 ('my brother', 'this woman'),
 ('my dad', 'She'),
 ('my dad', 'my aunt'),
 ('my dad', 'my daughter'),
 ('my dad', 'my girlfriend'),
 ('my dad', 'my mom'),
 ('my dad', 'my mother'),
 ('my dad', 'my sister'),
 

In [127]:
len(covered_pairs)

99

In [128]:
score = len(covered_pairs)/len(pairs) * 100

In [129]:
print("Pairwise Coverage: {:.2f}%".format(score))

Pairwise Coverage: 99.00%


In [130]:
all_covered_pairs =  all_covered_pairs.union(covered_pairs)

In [131]:
all_pairs = all_pairs.union(pairs)

### C. Test for Indirect Gender Bias, i.e. Occupational Bias

In [132]:
noun_choice =  2 #Noun /Pronoun

In [133]:
test_strategy = "exploitation"
mode = "gender-occupation-noun"
start_dir = "/Users/ezekiel.soremekun/Documents/Coref-Fairness-Test-Generation/Ezekiel-Testbed/trained-sentiment-analyzers/Exploitation/saved_pickles/" +  test_strategy + "/" + subject + "/" + mode + "/"

In [134]:
generated_input1_path = start_dir + "unique_input1_set.pickle"
generated_input2_path = start_dir + "unique_input2_set.pickle"
generated_input_pairs_path = start_dir + "unique_input_pair_set.pickle"

In [135]:
generated_input1, generated_input2, generated_input_pairs = None, None, None

In [136]:
with open(generated_input1_path, 'rb') as handle:
    generated_input1 = pickle.load(handle)

In [137]:
with open(generated_input2_path, 'rb') as handle:
    generated_input2 = pickle.load(handle)

In [138]:
with open(generated_input_pairs_path, 'rb') as handle:
    generated_input_pairs = pickle.load(handle)

In [139]:
len(generated_input1)

1816

In [140]:
for i in generated_input1:
    print(i)
    break

the librarian made me feel happy.


In [141]:
gen_nodes_1 = set()

In [142]:
#check coverage in input1
for sentence in generated_input1:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_1.add(node)

In [143]:
print(len(gen_nodes_1))

77


In [144]:
# gen_nodes_1

In [145]:
gen_nodes_2 = set()

In [146]:
#check coverage in input2
for sentence in generated_input2:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_2.add(node)

In [147]:
print(len(gen_nodes_2))

79


In [148]:
all_gen_terminals = gen_nodes_1.union(gen_nodes_2)

In [149]:
print(len(all_gen_terminals))

102


In [150]:
all_gen_terminals = set(gen_nodes_1.union(gen_nodes_2))

In [151]:
len(all_gen_terminals)

102

In [152]:
print(len(gen_nodes_2))

79


In [153]:
score = len(all_gen_terminals)/len(all_nodes) * 100

In [154]:
print("Terminal Coverage: {:.2f}%".format(score))

Terminal Coverage: 44.93%


In [155]:
all_covered_nodes = all_covered_nodes.union(all_gen_terminals)

#### Compute Pairwise Sensitive Attribute Coverage

In [156]:
subj_person_male, subj_person_female = [], []

In [157]:
subj_person_male, subj_person_female = subj_choice(noun_choice)

In [158]:
subj_person_female

['the cashier',
 'the teacher',
 'the nurse',
 'the assistant',
 'the secretary',
 'the auditor',
 'the cleaner',
 'the receptionist',
 'the clerk',
 'the counselor',
 'the designer',
 'the hairdresser',
 'the attendant',
 'the writer',
 'the housekeeper',
 'the baker',
 'the editor',
 'the librarian',
 'the tailor',
 'the teacher',
 'the the librarian',
 'the the nurse',
 'the the paralegal']

In [159]:
subj_person_male

['the supervisor',
 'the janitor',
 'the cook',
 'the mover',
 'the laborer',
 'the construction worker',
 'the chief',
 'the developer',
 'the carpenter',
 'the manager',
 'the lawyer',
 'the farmer',
 'the driver',
 'the salesperson',
 'the physician',
 'the guard',
 'the analyst',
 'the mechanic',
 'the sheriff',
 'the CEO',
 'the technician',
 'the accountant',
 'the engineer']

In [160]:
pairs = []

In [161]:
# assert(len(subj_person_male) == len(subj_person_female)), "ERROR length mismatch"
# for i in range(max(len(subj_person_male), len(subj_person_female))):
#     pairs.append((subj_person_male[i],subj_person_female[i]))

In [162]:
assert(len(subj_person_male) == len(subj_person_female)), "ERROR length mismatch"
for i in subj_person_male:
    for j in subj_person_female:
        pairs.append((i,j))

In [163]:
len(pairs)

529

In [164]:
pairs = set(pairs)

In [165]:
len(set(pairs))

506

In [166]:
covered_pairs = set()

In [167]:
for sentence_pair in generated_input_pairs:
    for combination in pairs:
        if ((combination[0] in sentence_pair.split(";")[1]) and (combination[1] in sentence_pair.split(";")[0])) \
            or ((combination[0] in sentence_pair.split(";")[0]) and (combination[1] in sentence_pair.split(";")[1])):
            covered_pairs.add(combination)

In [168]:
covered_pairs

{('the CEO', 'the assistant'),
 ('the CEO', 'the auditor'),
 ('the CEO', 'the baker'),
 ('the CEO', 'the cashier'),
 ('the CEO', 'the cleaner'),
 ('the CEO', 'the clerk'),
 ('the CEO', 'the counselor'),
 ('the CEO', 'the designer'),
 ('the CEO', 'the housekeeper'),
 ('the CEO', 'the librarian'),
 ('the CEO', 'the nurse'),
 ('the CEO', 'the receptionist'),
 ('the CEO', 'the secretary'),
 ('the CEO', 'the tailor'),
 ('the CEO', 'the teacher'),
 ('the CEO', 'the the librarian'),
 ('the CEO', 'the writer'),
 ('the accountant', 'the attendant'),
 ('the accountant', 'the auditor'),
 ('the accountant', 'the cashier'),
 ('the accountant', 'the cleaner'),
 ('the accountant', 'the counselor'),
 ('the accountant', 'the designer'),
 ('the accountant', 'the editor'),
 ('the accountant', 'the hairdresser'),
 ('the accountant', 'the housekeeper'),
 ('the accountant', 'the librarian'),
 ('the accountant', 'the nurse'),
 ('the accountant', 'the receptionist'),
 ('the accountant', 'the tailor'),
 ('the 

In [169]:
len(covered_pairs)

326

In [170]:
score = len(covered_pairs)/len(pairs) * 100

In [171]:
print("Pairwise Coverage: {:.2f}%".format(score))

Pairwise Coverage: 64.43%


In [172]:
all_covered_pairs = all_covered_pairs.union(covered_pairs)

In [173]:
all_pairs = all_pairs.union(pairs)

### D. Test for   for Indirect Gender Bias, i.e. Name Bias

In [174]:
noun_choice =  3 #Noun /Pronoun

In [175]:
test_strategy = "exploitation"
mode = "gender-name-noun"
start_dir = "/Users/ezekiel.soremekun/Documents/Coref-Fairness-Test-Generation/Ezekiel-Testbed/trained-sentiment-analyzers/Exploitation/saved_pickles/" +  test_strategy + "/" + subject + "/" + mode + "/"

In [176]:
generated_input1_path = start_dir + "unique_input1_set.pickle"
generated_input2_path = start_dir + "unique_input2_set.pickle"
generated_input_pairs_path = start_dir + "unique_input_pair_set.pickle"

In [177]:
generated_input1, generated_input2, generated_input_pairs = None, None, None

In [178]:
with open(generated_input1_path, 'rb') as handle:
    generated_input1 = pickle.load(handle)

In [179]:
with open(generated_input2_path, 'rb') as handle:
    generated_input2 = pickle.load(handle)

In [180]:
with open(generated_input_pairs_path, 'rb') as handle:
    generated_input_pairs = pickle.load(handle)

In [181]:
len(generated_input1)

1982

In [182]:
for i in generated_input1:
    print(i)
    break

Amanda was glad.


In [183]:
gen_nodes_1 = set()

In [184]:
#check coverage in input1
for sentence in generated_input1:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_1.add(node)

In [185]:
print(len(gen_nodes_1))

85


In [186]:
# gen_nodes_1

In [187]:
gen_nodes_2 = set()

In [188]:
#check coverage in input2
for sentence in generated_input2:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_2.add(node)

In [189]:
print(len(gen_nodes_2))

87


In [190]:
all_gen_terminals = gen_nodes_1.union(gen_nodes_2)

In [191]:
print(len(all_gen_terminals))

118


In [192]:
all_gen_terminals = set(gen_nodes_1.union(gen_nodes_2))

In [193]:
len(all_gen_terminals)

118

In [194]:
print(len(gen_nodes_2))

87


In [195]:
score = len(all_gen_terminals)/len(all_nodes) * 100

In [196]:
print("Terminal Coverage: {:.2f}%".format(score))

Terminal Coverage: 51.98%


In [197]:
all_covered_nodes = all_covered_nodes.union(all_gen_terminals)

#### Compute Pairwise Sensitive Attribute Coverage

In [198]:
subj_person_male, subj_person_female = [], []

In [199]:
subj_person_male, subj_person_female = [], []

In [200]:
subj_person_male, subj_person_female = subj_choice(noun_choice)

In [201]:
subj_person_female

['Mary',
 'Patricia',
 'Jennifer',
 'Linda',
 'Elizabeth',
 'Barbara',
 'Susan',
 'Jessica',
 'Sarah',
 'Karen',
 'Nancy',
 'Margaret',
 'Lisa',
 'Betty',
 'Dorothy ',
 'Sandra',
 'Ashley',
 'Kimberly',
 'Donna',
 'Emily',
 'Michelle',
 'Carol',
 'Amanda',
 'Melissa',
 'Deborah',
 'Stephanie',
 'Rebecca',
 'Laura',
 'Sharon',
 'Cynthia']

In [202]:
subj_person_male

['James',
 'John ',
 'Robert ',
 'Michael ',
 'William ',
 'David ',
 'Richard',
 'Joseph',
 'Thomas',
 'Charles',
 'Christopher',
 'Daniel',
 'Matthew',
 'Anthony',
 'Donald',
 'Mark',
 'Paul',
 'Steven',
 'Andrew',
 'Kenneth',
 'Joshua',
 'George',
 'Kevin',
 'Brian',
 'Edward',
 'Ronald',
 'Timothy',
 'Jason',
 'Jeffrey',
 'Ryan']

In [203]:
pairs = []

In [204]:
# assert(len(subj_person_male) == len(subj_person_female)), "ERROR length mismatch"
# for i in range(max(len(subj_person_male), len(subj_person_female))):
#     pairs.append((subj_person_male[i],subj_person_female[i]))

In [205]:
assert(len(subj_person_male) == len(subj_person_female)), "ERROR length mismatch"
for i in subj_person_male:
    for j in subj_person_female:
        pairs.append((i,j))

In [206]:
len(pairs)

900

In [207]:
pairs = set(pairs)

In [208]:
len(set(pairs))

900

In [209]:
covered_pairs = set()

In [210]:
for sentence_pair in generated_input_pairs:
    for combination in pairs:
        if ((combination[0] in sentence_pair.split(";")[1]) and (combination[1] in sentence_pair.split(";")[0])) \
            or ((combination[0] in sentence_pair.split(";")[0]) and (combination[1] in sentence_pair.split(";")[1])):
            covered_pairs.add(combination)

In [211]:
covered_pairs

{('Andrew', 'Barbara'),
 ('Andrew', 'Betty'),
 ('Andrew', 'Carol'),
 ('Andrew', 'Dorothy '),
 ('Andrew', 'Elizabeth'),
 ('Andrew', 'Kimberly'),
 ('Andrew', 'Laura'),
 ('Andrew', 'Lisa'),
 ('Andrew', 'Mary'),
 ('Andrew', 'Michelle'),
 ('Andrew', 'Stephanie'),
 ('Anthony', 'Barbara'),
 ('Anthony', 'Betty'),
 ('Anthony', 'Cynthia'),
 ('Anthony', 'Deborah'),
 ('Anthony', 'Donna'),
 ('Anthony', 'Karen'),
 ('Anthony', 'Laura'),
 ('Anthony', 'Linda'),
 ('Anthony', 'Margaret'),
 ('Anthony', 'Michelle'),
 ('Anthony', 'Patricia'),
 ('Anthony', 'Sandra'),
 ('Anthony', 'Stephanie'),
 ('Brian', 'Amanda'),
 ('Brian', 'Ashley'),
 ('Brian', 'Carol'),
 ('Brian', 'Karen'),
 ('Brian', 'Linda'),
 ('Brian', 'Michelle'),
 ('Brian', 'Sandra'),
 ('Charles', 'Barbara'),
 ('Charles', 'Carol'),
 ('Charles', 'Deborah'),
 ('Charles', 'Jennifer'),
 ('Charles', 'Karen'),
 ('Charles', 'Laura'),
 ('Charles', 'Mary'),
 ('Charles', 'Michelle'),
 ('Charles', 'Rebecca'),
 ('Christopher', 'Amanda'),
 ('Christopher', 'Barba

In [212]:
len(covered_pairs)

394

In [213]:
score = len(covered_pairs)/len(pairs) * 100

In [214]:
print("Pairwise Coverage: {:.2f}%".format(score))

Pairwise Coverage: 43.78%


In [215]:
all_covered_pairs = all_covered_pairs.union(covered_pairs)

In [216]:
all_pairs = all_pairs.union(pairs)

### E. Test for   for Indirect Racial Bias, i.e. Name Bias

In [217]:
noun_choice =  5 #Noun /Pronoun

In [218]:
test_strategy = "exploitation"
mode = "racial-name-noun"
start_dir = "/Users/ezekiel.soremekun/Documents/Coref-Fairness-Test-Generation/Ezekiel-Testbed/trained-sentiment-analyzers/Exploitation/saved_pickles/" +  test_strategy + "/" + subject + "/" + mode + "/"

In [219]:
generated_input1_path = start_dir + "unique_input1_set.pickle"
generated_input2_path = start_dir + "unique_input2_set.pickle"
generated_input_pairs_path = start_dir + "unique_input_pair_set.pickle"

In [220]:
generated_input1, generated_input2, generated_input_pairs = None, None, None

In [221]:
with open(generated_input1_path, 'rb') as handle:
    generated_input1 = pickle.load(handle)

In [222]:
with open(generated_input2_path, 'rb') as handle:
    generated_input2 = pickle.load(handle)

In [223]:
with open(generated_input_pairs_path, 'rb') as handle:
    generated_input_pairs = pickle.load(handle)

In [224]:
len(generated_input1)

1748

In [225]:
for i in generated_input1:
    print(i)
    break

The conversation with Josh was annoying.


In [226]:
gen_nodes_1 = set()

In [227]:
#check coverage in input1
for sentence in generated_input1:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_1.add(node)

In [228]:
print(len(gen_nodes_1))

76


In [229]:
# gen_nodes_1

In [230]:
gen_nodes_2 = set()

In [231]:
#check coverage in input2
for sentence in generated_input2:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_2.add(node)

In [232]:
print(len(gen_nodes_2))

77


In [233]:
all_gen_terminals = gen_nodes_1.union(gen_nodes_2)

In [234]:
print(len(all_gen_terminals))

99


In [235]:
all_gen_terminals = set(gen_nodes_1.union(gen_nodes_2))

In [236]:
len(all_gen_terminals)

99

In [237]:
print(len(gen_nodes_2))

77


In [238]:
score = len(all_gen_terminals)/len(all_nodes) * 100

In [239]:
print("Terminal Coverage: {:.2f}%".format(score))

Terminal Coverage: 43.61%


In [240]:
all_covered_nodes = all_covered_nodes.union(all_gen_terminals)

#### Compute Pairwise Sensitive Attribute Coverage

In [241]:
subj_person_male, subj_person_female = [], []

In [242]:
subj_person_male, subj_person_female = subj_choice(noun_choice)

In [243]:
subj_person_female

['Amanda',
 'Betsy',
 'Courtney',
 'Ellen',
 'Heather',
 'Katie',
 'Kristin',
 'Melanie',
 'Nancy',
 'Stephanie',
 'Adam',
 'Alan',
 'Andrew',
 'Frank',
 'Harry',
 'Jack',
 'Josh',
 'Justin',
 'Roger',
 'Ryan']

In [244]:
subj_person_male

['Ebony',
 'Jasmine',
 'Lakisha',
 'Latisha',
 'Latoya',
 'Nichelle',
 'Shaniqua',
 'Shereen',
 'Tanisha',
 'Tia',
 'Alonzo',
 'Alphonse',
 'Darnell',
 'Jamel',
 'Jerome',
 'Lamar',
 'Leroy',
 'Malik',
 'Terrence',
 'Torrance']

In [245]:
pairs = []

In [246]:
assert(len(subj_person_male) == len(subj_person_female)), "ERROR length mismatch"
for i in subj_person_male:
    for j in subj_person_female:
        pairs.append((i,j))

In [247]:
len(pairs)

400

In [248]:
pairs = set(pairs)

In [249]:
len(set(pairs))

400

In [250]:
covered_pairs = set()

In [251]:
for sentence_pair in generated_input_pairs:
    for combination in pairs:
        if ((combination[0] in sentence_pair.split(";")[1]) and (combination[1] in sentence_pair.split(";")[0])) \
            or ((combination[0] in sentence_pair.split(";")[0]) and (combination[1] in sentence_pair.split(";")[1])):
            covered_pairs.add(combination)

In [252]:
covered_pairs

{('Alonzo', 'Adam'),
 ('Alonzo', 'Alan'),
 ('Alonzo', 'Amanda'),
 ('Alonzo', 'Andrew'),
 ('Alonzo', 'Ellen'),
 ('Alonzo', 'Frank'),
 ('Alonzo', 'Harry'),
 ('Alonzo', 'Heather'),
 ('Alonzo', 'Jack'),
 ('Alonzo', 'Josh'),
 ('Alonzo', 'Justin'),
 ('Alonzo', 'Katie'),
 ('Alonzo', 'Nancy'),
 ('Alonzo', 'Roger'),
 ('Alonzo', 'Ryan'),
 ('Alonzo', 'Stephanie'),
 ('Alphonse', 'Adam'),
 ('Alphonse', 'Alan'),
 ('Alphonse', 'Amanda'),
 ('Alphonse', 'Andrew'),
 ('Alphonse', 'Courtney'),
 ('Alphonse', 'Harry'),
 ('Alphonse', 'Jack'),
 ('Alphonse', 'Josh'),
 ('Alphonse', 'Justin'),
 ('Alphonse', 'Kristin'),
 ('Alphonse', 'Melanie'),
 ('Alphonse', 'Nancy'),
 ('Alphonse', 'Ryan'),
 ('Alphonse', 'Stephanie'),
 ('Darnell', 'Adam'),
 ('Darnell', 'Alan'),
 ('Darnell', 'Amanda'),
 ('Darnell', 'Andrew'),
 ('Darnell', 'Courtney'),
 ('Darnell', 'Frank'),
 ('Darnell', 'Harry'),
 ('Darnell', 'Heather'),
 ('Darnell', 'Jack'),
 ('Darnell', 'Josh'),
 ('Darnell', 'Justin'),
 ('Darnell', 'Melanie'),
 ('Darnell', 'Nan

In [253]:
len(covered_pairs)

297

In [254]:
score = len(covered_pairs)/len(pairs) * 100

In [255]:
print("Pairwise Coverage: {:.2f}%".format(score))

Pairwise Coverage: 74.25%


In [256]:
all_covered_pairs = all_covered_pairs.union(covered_pairs)

In [257]:
all_pairs = all_pairs.union(pairs)

### F. Test for Neutral (Sentiment) Sentences

In [258]:
noun_choice_list = list(range(6))
noun_choice_list

[0, 1, 2, 3, 4, 5]

In [259]:
test_strategy = "exploitation"
mode = "neutral-sentiments"
start_dir = "/Users/ezekiel.soremekun/Documents/Coref-Fairness-Test-Generation/Ezekiel-Testbed/trained-sentiment-analyzers/Exploitation/saved_pickles/" +  test_strategy + "/" + subject + "/" + mode + "/"

In [260]:
generated_input1_path = start_dir + "unique_input1_set.pickle"
generated_input2_path = start_dir + "unique_input2_set.pickle"
generated_input_pairs_path = start_dir + "unique_input_pair_set.pickle"

In [261]:
generated_input1, generated_input2, generated_input_pairs = None, None, None

In [262]:
with open(generated_input1_path, 'rb') as handle:
    generated_input1 = pickle.load(handle)

In [263]:
with open(generated_input2_path, 'rb') as handle:
    generated_input2 = pickle.load(handle)

In [264]:
with open(generated_input_pairs_path, 'rb') as handle:
    generated_input_pairs = pickle.load(handle)

In [265]:
len(generated_input1)

320

In [266]:
for i in generated_input1:
    print(i)
    break

I saw my girlfriend yesterday.


In [267]:
gen_nodes_1 = set()

In [268]:
#check coverage in input1
for sentence in generated_input1:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_1.add(node)

In [269]:
print(len(gen_nodes_1))

43


In [270]:
# gen_nodes_1

In [271]:
gen_nodes_2 = set()

In [272]:
#check coverage in input2
for sentence in generated_input2:
    for node in all_nodes:
        if node in sentence:
            gen_nodes_2.add(node)

In [273]:
print(len(gen_nodes_2))

44


In [274]:
all_gen_terminals = gen_nodes_1.union(gen_nodes_2)

In [275]:
print(len(all_gen_terminals))

76


In [276]:
all_gen_terminals = set(gen_nodes_1.union(gen_nodes_2))

In [277]:
len(all_gen_terminals)

76

In [278]:
print(len(gen_nodes_2))

44


In [279]:
score = len(all_gen_terminals)/len(all_nodes) * 100

In [280]:
print("Terminal Coverage: {:.2f}%".format(score))

Terminal Coverage: 33.48%


In [281]:
all_covered_nodes = all_covered_nodes.union(all_gen_terminals)

#### Compute Pairwise Sensitive Attribute Coverage

In [282]:
subj_person_male, subj_person_female = [], []

In [283]:
for noun_choice in noun_choice_list:
    subj_person_male += subj_choice(noun_choice)[0]
    subj_person_female+= subj_choice(noun_choice)[1]

In [284]:
# subj_person_female

In [285]:
# subj_person_male

In [286]:
pairs = []

In [287]:
# assert(len(subj_person_male) == len(subj_person_female)), "ERROR length mismatch"
# for i in range(max(len(subj_person_male), len(subj_person_female))):
#     pairs.append((subj_person_male[i],subj_person_female[i]))

In [288]:
assert(len(subj_person_male) == len(subj_person_female)), "ERROR length mismatch"
for i in subj_person_male:
    for j in subj_person_female:
        pairs.append((i,j))

In [289]:
# pairs

In [290]:
len(pairs)

12769

In [291]:
len(set(pairs))

8099

In [292]:
pairs = set(pairs)

In [293]:
covered_pairs = set()

In [294]:
for sentence_pair in generated_input_pairs:
    for combination in pairs:
        if ((combination[0] in sentence_pair.split(";")[1]) and (combination[1] in sentence_pair.split(";")[0])) \
            or ((combination[0] in sentence_pair.split(";")[0]) and (combination[1] in sentence_pair.split(";")[1])):
            covered_pairs.add(combination)

In [295]:
# covered_pairs

In [296]:
len(covered_pairs)

684

In [297]:
score = len(covered_pairs)/len(pairs) * 100

In [298]:
print("Pairwise Coverage: {:.2f}%".format(score))

Pairwise Coverage: 8.45%


In [299]:
all_covered_pairs = all_covered_pairs.union(covered_pairs)

In [300]:
all_pairs = all_pairs.union(pairs)

## Overall Coverage for all test configurations 

In [301]:
len(all_covered_nodes)

224

In [302]:
len(all_covered_pairs)

1512

In [303]:
overall_terminal_coverage_score = len(all_covered_nodes)/len(all_nodes) * 100

In [304]:
print("Overall Terminal Node Coverage: {:.2f}%".format(overall_terminal_coverage_score))

Overall Terminal Node Coverage: 98.68%


In [305]:
overall_pairwise_score = len(all_covered_pairs)/len(all_pairs) * 100

In [306]:
print("Overall Pairwise Coverage: {:.2f}%".format(overall_pairwise_score))

Overall Pairwise Coverage: 18.67%
