# Stanford CoreNLP Exploitation Unambiguous

In [1]:
import random
import pickle

import numpy as np

import stanza
from stanza.server import CoreNLPClient
stanza.download('en') 
import sys, os


# Add neural coref to SpaCy's pipe
nlp = stanza.Pipeline()
doc = nlp("Barack Obama was born in Hawaii. He was elected president in 2008.")
doc.sentences[0].print_dependencies()
pass




Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.0.0.json: 115kB [00:00, 2.66MB/s]                    
2020-07-20 15:51:10 INFO: Downloading default packages for language: en (English)...
2020-07-20 15:51:11 INFO: File exists: /Users/sakshiudeshi/stanza_resources/en/default.zip.
2020-07-20 15:51:14 INFO: Finished downloading models and saved to /Users/sakshiudeshi/stanza_resources.
2020-07-20 15:51:14 INFO: Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | ewt       |
| pos       | ewt       |
| lemma     | ewt       |
| depparse  | ewt       |
| ner       | ontonotes |

2020-07-20 15:51:14 INFO: Use device: cpu
2020-07-20 15:51:14 INFO: Loading: tokenize
2020-07-20 15:51:14 INFO: Loading: pos
2020-07-20 15:51:14 INFO: Loading: lemma
2020-07-20 15:51:15 INFO: Loading: depparse
2020-07-20 15:51:15 INFO: Loading: ner
2020-07-20 15:51:16 INFO: Done loading processors!


('Barack', '4', 'nsubj:pass')
('Obama', '1', 'flat')
('was', '4', 'aux:pass')
('born', '0', 'root')
('in', '6', 'case')
('Hawaii', '4', 'obl')
('.', '4', 'punct')


In [4]:
os.environ["CORENLP_HOME"] = "/Users/sakshiudeshi/Documents/SUTD/Research/Coref-Fairness-Test-Generation/Sakshi-Testbed/Stanford-CoreNLP-Coref/stanford-corenlp-4.0.0"
text = "Chris Manning is a nice person. Chris wrote a simple sentence. He also gives oranges to people."
with CoreNLPClient(
        annotators=['tokenize','ssplit','pos','lemma','ner', 'parse', 'depparse','coref'],
        timeout=30000,
        memory='16G') as client:
    ann = client.annotate(text)

Starting server with command: java -Xmx16G -cp /Users/sakshiudeshi/Documents/SUTD/Research/Coref-Fairness-Test-Generation/Sakshi-Testbed/Stanford-CoreNLP-Coref/stanford-corenlp-4.0.0/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 30000 -threads 5 -maxCharLength 100000 -quiet True -serverProperties corenlp_server-c46ac54451bb48a3.props -preload tokenize,ssplit,pos,lemma,ner,parse,depparse,coref


In [5]:
verb_list_p1 = ['asked', 'disclosed to', 'told'] ## pronoun1 = ['he', 'she', 'they']
verb_list_p2 = ['notified', 'spoke with'] ##  pronoun2 = ['his', 'her', 'their']

In [6]:
def choose_pronoun_type (verb):
    return ['He', 'She', 'They']

In [7]:
filler_conjunction = {}
filler_conjunction[verb_list_p1[0]] = "about "
filler_conjunction[verb_list_p1[1]] = "that "
filler_conjunction[verb_list_p1[2]] = "that "
filler_conjunction[verb_list_p2[0]] = "that "
filler_conjunction[verb_list_p2[1]] = "about "

In [8]:
verb_action = {}

action1 = ['the work was unsatisfactory', 'the performance was not up to the mark',
           'the application was approved', 'the application was not approved',
           'the application was under review', 'the work was appreciated', 
           'this behaviour was unacceptable', 'the project scope had been finalised',
           'the meat was eaten', 'a musical instrument was played', 
           'the game was excellent', 'the performance was excellent', 
           'the performance was unsatisfactory', 'the cookies were baked', 
           'the waiting times were estimated', 'the document was signed', 
           'the delivery was made', 'the tournament was excellent', 
           'the concert was unsatisfactory', 'the concert was appreciated']

verb_action[verb_list_p1[1]] = [action1] ### disclosed to
verb_action[verb_list_p1[2]] = [action1] ### told

verb_action[verb_list_p2[0]] = [action1] ### notified

In [9]:
### spoke with 
### asked
action2 = ['painting', 'dancing',
           'fencing', 'gymnastics',
           'independent cinema', 'woodworking',
           'studying', 'horse racing',
           'singing', 'kayaking', 'football',
           'baseball', 'basketball', 'quizzing', 
           'gardening', 'blogging', 'board games', 
           'breadmaking', 'baking']
verb_action[verb_list_p2[1]] = [action2]
verb_action[verb_list_p1[0]] = [action2]

In [10]:
auxiliary_verb = ['standing', 'speaking', 'talking', 
                  'passing by', 'sitting', 'sitting on the chair']

In [11]:
def predict_clusters(sentence):
    doc = client.annotate(sentence)
#     print((doc.))
    return(doc.corefChain)
#     if doc._.has_coref: 
#         return (doc._.coref_resolved, doc._.coref_clusters)
#     else:
#         return ('', '')

In [12]:
def get_pred_list(pred_str):
    pred_list = []
    for line in pred_str.split('\n'):
        if 'gender' not in line:
            pred_list.append(line)
    return pred_list

In [13]:
def get_pred_equivalence(pred1, pred2):
    pred1_list = get_pred_list(str(pred1))
    pred2_list = get_pred_list(str(pred2))
#     print(pred1_list)
#     print(pred2_list)
    return pred1_list == pred2_list

In [14]:
def update_dict(x, key):
    if(key in x.keys()):
        x[key] += 1
    else:
        x[key] = 1

In [15]:
def generate_sentences(oc1, oc2, verb, action, pronoun):


    aux_verb = random.choice(auxiliary_verb)
    input1 = ("The " + oc1 + " was " + aux_verb + ". " + pronoun[0] + " " 
              + verb + " " + "the " + oc2 + " " + filler_conjunction[verb] 
              + action + '.') 

    input2 = ("The " + oc1 + " was " + aux_verb + ". " + pronoun[1] + " " 
              + verb + " " + "the " + oc2 + " " + filler_conjunction[verb] 
              + action + '.') 
        
    return input1, input2
    

In [16]:
with open('saved_pickles/Exploration/unique_input1_set.pickle', 'rb') as handle:
    unique_input1_set = pickle.load(handle)
    
with open('saved_pickles/Exploration/unique_input1_error_set.pickle', 'rb') as handle:
    unique_input1_error_set = pickle.load(handle)

with open('saved_pickles/Exploration/occupation_pair_error.pickle', 'rb') as handle:
    occupation_pair_error = pickle.load(handle)
    
with open('saved_pickles/Exploration/occupation1_error.pickle', 'rb') as handle:
    occupation1_error = pickle.load(handle)
    
with open('saved_pickles/Exploration/occupation2_error.pickle', 'rb') as handle:
    occupation2_error = pickle.load(handle)  
    
with open('saved_pickles/Exploration/verb_error.pickle', 'rb') as handle:
    verb_error = pickle.load(handle)    
    
with open('saved_pickles/Exploration/action_error.pickle', 'rb') as handle:
    action_error = pickle.load(handle)
    
with open('saved_pickles/Exploration/occupation_pair_count.pickle', 'rb') as handle:
    occupation_pair_count = pickle.load(handle)
    
with open('saved_pickles/Exploration/occupation1_count.pickle', 'rb') as handle:
    occupation1_count = pickle.load(handle)
    
with open('saved_pickles/Exploration/occupation2_count.pickle', 'rb') as handle:
    occupation2_count = pickle.load(handle)  
    
with open('saved_pickles/Exploration/verb_count.pickle', 'rb') as handle:
    verb_count = pickle.load(handle)    
    
with open('saved_pickles/Exploration/action_count.pickle', 'rb') as handle:
    action_count = pickle.load(handle)

In [17]:
def get_sorted_dict(D):
    return {k: v for k, v in sorted(D.items(), key=lambda item: item[1], reverse=1)}

In [18]:
def get_error_rate_dict(error_dict, count_dict):
    error_rate_dict = {}
    for key in error_dict:
        error_rate_dict[key] = error_dict[key]/count_dict[key]
    return get_sorted_dict(error_rate_dict)

In [19]:
def get_probability_dict(error_dict, count_dict):
    error_rate_dict = get_error_rate_dict(error_dict, count_dict)
    
    probability_dict = {}
    error_rate_sum = sum(error_rate_dict.values())
    for error_rate in error_rate_dict:
        probability_dict[error_rate] = error_rate_dict[error_rate]/error_rate_sum
    
    return probability_dict
        
        

In [20]:
def get_weighted_random_choice(error_dict, count_dict, probablilities_dict = None):
    if probablilities_dict == None:
        probability_dict = get_probability_dict(error_dict, count_dict)
    else:
        probability_dict = probablilities_dict
    
    return list(probability_dict.keys())[np.random.choice(len(list(probability_dict.keys())), p=list(probability_dict.values()))]

In [30]:
unique_input1_set_exploitation = set()
unique_input1_error_set_exploitation = set()

occupation_pair_error_exploitation = {}

occupation1_error_exploitation = {}

occupation2_error_exploitation = {}

verb_error_exploitation = {}

action_error_exploitation = {}

occupation_pair_count_exploitation = {}

occupation1_count_exploitation = {}

occupation2_count_exploitation = {}

verb_count_exploitation = {}

action_count_exploitation = {}


In [31]:
oc1_probability = get_probability_dict(occupation1_error, occupation1_count)

print(oc1_probability)
print()

error_rate_dict = get_error_rate_dict(occupation1_error, occupation1_count)
print(error_rate_dict)
print()

output_dict = {}
for i in range(100000):
    oc1 = get_weighted_random_choice(occupation1_error, occupation1_count, probablilities_dict=oc1_probability)
    update_dict(output_dict, oc1)
print(get_sorted_dict(output_dict))

{'technician': 0.805964295571082, 'manager': 0.06766120012201676, 'developer': 0.06667344537570993, 'driver': 0.05970105893119126}

{'technician': 0.08823529411764706, 'manager': 0.007407407407407408, 'developer': 0.0072992700729927005, 'driver': 0.006535947712418301}

{'technician': 80907, 'manager': 6715, 'developer': 6494, 'driver': 5884}


In [28]:
def generate_test_sentences(ITERS=3000):
    err_count = 0

    oc1_probability = get_probability_dict(occupation1_error, occupation1_count)
    oc2_probability = get_probability_dict(occupation2_error, occupation2_count)

    for i in range(ITERS):
#         oc1 = random.choice(occupations_1)
#         oc2 = random.choice(occupations_2)
        oc1 = get_weighted_random_choice(occupation1_error, occupation1_count, probablilities_dict=oc1_probability)
        oc2 = get_weighted_random_choice(occupation2_error, occupation2_count, probablilities_dict=oc2_probability)
        verb = random.choice(list(verb_action.keys()))
        action = random.choice(random.choice(verb_action[verb]))
        pronoun = choose_pronoun_type(verb)
        input1, input2 = generate_sentences(oc1, oc2, verb, action, pronoun)


#         input3 = ("The " + oc1 + " " + verb + " "
#                + oc2 + " " + filler_conjunction[verb] +  pronoun[2] + " " + action) 
        pred1 = predict_clusters(input1)
        pred2 = predict_clusters(input2)
#         pred3, _ = predict_clusters(input3)


        if(i % 30 == 0):
            print("Unique errors: " + str(len(unique_input1_error_set_exploitation)))
            print("Unique inputs: " + str(len(unique_input1_set_exploitation)))
            print("Iterations: " + str(i))
            print("------------------------------")

        if input1 not in unique_input1_set:
            unique_input1_set_exploitation.add(input1)

        update_dict(occupation_pair_count_exploitation, (oc1, oc2))
        update_dict(occupation1_count_exploitation, oc1)
        update_dict(occupation2_count_exploitation, oc2)
        update_dict(verb_count_exploitation, verb)
        update_dict(action_count_exploitation, action)



        if not (get_pred_equivalence(pred1, pred2)):
#             if (len(pred1) > 0 and len(pred2) > 0 and len(pred3) > 0):
#                 if (len(pred1[0]) == len(pred2[0]) and len(pred2[0]) == len(pred3[0]) ):
    #         if(True):
                    err_count += 1
        
                    
                    if input1 not in unique_input1_error_set:
                        unique_input1_error_set_exploitation.add(input1)
                    
#                         if (pred2 != ''):
#                         print(pred1, pred2)
#                         print()
#                         print(input1)
#                         print(input2)
#                         print("--------------")
#                         else:
#                             print("empty pred2 error")
    #                 print(input3)

                    update_dict(occupation_pair_error_exploitation, (oc1, oc2))
                    update_dict(occupation1_error_exploitation, oc1)
                    update_dict(occupation2_error_exploitation, oc2)
                    update_dict(verb_error_exploitation, verb)
                    update_dict(action_error_exploitation, action)



    print(err_count)
    print(err_count/ITERS)
    print("Final Unique errors: " + str(len(unique_input1_error_set_exploitation)))
    print("Final Unique inputs: " + str(len(unique_input1_set_exploitation)))

In [32]:
generate_test_sentences(ITERS=3000)

Starting server with command: java -Xmx16G -cp /Users/sakshiudeshi/Documents/SUTD/Research/Coref-Fairness-Test-Generation/Sakshi-Testbed/Stanford-CoreNLP-Coref/stanford-corenlp-4.0.0/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 30000 -threads 5 -maxCharLength 100000 -quiet True -serverProperties corenlp_server-c46ac54451bb48a3.props -preload tokenize,ssplit,pos,lemma,ner,parse,depparse,coref
Unique errors: 0
Unique inputs: 0
Iterations: 0
------------------------------
Unique errors: 4
Unique inputs: 29
Iterations: 30
------------------------------
Unique errors: 9
Unique inputs: 59
Iterations: 60
------------------------------
Unique errors: 11
Unique inputs: 87
Iterations: 90
------------------------------
Unique errors: 13
Unique inputs: 115
Iterations: 120
------------------------------
Unique errors: 15
Unique inputs: 145
Iterations: 150
------------------------------
Unique errors: 15
Unique inputs: 174
Iterations: 180
------------------------------
Uniqu

Unique errors: 224
Unique inputs: 2209
Iterations: 2730
------------------------------
Unique errors: 225
Unique inputs: 2226
Iterations: 2760
------------------------------
Unique errors: 227
Unique inputs: 2241
Iterations: 2790
------------------------------
Unique errors: 229
Unique inputs: 2265
Iterations: 2820
------------------------------
Unique errors: 229
Unique inputs: 2286
Iterations: 2850
------------------------------
Unique errors: 229
Unique inputs: 2304
Iterations: 2880
------------------------------
Unique errors: 232
Unique inputs: 2325
Iterations: 2910
------------------------------
Unique errors: 235
Unique inputs: 2343
Iterations: 2940
------------------------------
Unique errors: 238
Unique inputs: 2360
Iterations: 2970
------------------------------
333
0.111
Final Unique errors: 239
Final Unique inputs: 2379


In [33]:
import pickle

with open('saved_pickles/Exploitation/unique_input1_set.pickle', 'wb') as handle:
    pickle.dump(unique_input1_set_exploitation, handle)
    
with open('saved_pickles/Exploitation/unique_input1_error_set.pickle', 'wb') as handle:
    pickle.dump(unique_input1_error_set_exploitation, handle)

In [34]:
with open('saved_pickles/Exploitation/occupation_pair_count.pickle', 'wb') as handle:
    pickle.dump(occupation_pair_count_exploitation, handle)
    
with open('saved_pickles/Exploitation/occupation1_count.pickle', 'wb') as handle:
    pickle.dump(occupation1_count_exploitation, handle)
    
with open('saved_pickles/Exploitation/occupation2_count.pickle', 'wb') as handle:
    pickle.dump(occupation2_count_exploitation, handle)
    
with open('saved_pickles/Exploitation/verb_count.pickle', 'wb') as handle:
    pickle.dump(verb_count_exploitation, handle)

with open('saved_pickles/Exploitation/action_count.pickle', 'wb') as handle:
    pickle.dump(action_count_exploitation, handle)

In [35]:
with open('saved_pickles/Exploitation/occupation_pair_error.pickle', 'wb') as handle:
    pickle.dump(occupation_pair_error_exploitation, handle)
    
with open('saved_pickles/Exploitation/occupation1_error.pickle', 'wb') as handle:
    pickle.dump(occupation1_error_exploitation, handle)
    
with open('saved_pickles/Exploitation/occupation2_error.pickle', 'wb') as handle:
    pickle.dump(occupation2_error_exploitation, handle)
    
with open('saved_pickles/Exploitation/verb_error.pickle', 'wb') as handle:
    pickle.dump(verb_error_exploitation, handle)

with open('saved_pickles/Exploitation/action_error.pickle', 'wb') as handle:
    pickle.dump(action_error_exploitation, handle)

In [38]:
print(list(unique_input1_error_set_exploitation)[:20])

['The technician was passing by. He asked the housekeeper about fencing.', 'The technician was passing by. He notified the housekeeper that the project scope had been finalised.', 'The technician was passing by. He notified the teacher that the application was under review.', 'The technician was passing by. He disclosed to the teacher that the game was excellent.', 'The technician was passing by. He disclosed to the clerk that the application was approved.', 'The technician was passing by. He told the clerk that the application was not approved.', 'The technician was passing by. He told the auditor that the meat was eaten.', 'The technician was passing by. He disclosed to the clerk that the meat was eaten.', 'The technician was passing by. He notified the paralegal that the waiting times were estimated.', 'The technician was passing by. He spoke with the auditor about horse racing.', 'The technician was passing by. He disclosed to the clerk that the performance was excellent.', 'The te