# Neuralcoref Exploration Unambiguous

In [20]:
import random

import spacy
nlp = spacy.load('en')

# Add neural coref to SpaCy's pipe
import neuralcoref
neuralcoref.add_to_pipe(nlp)
pass


In [21]:
occupations_1 = ['supervisor',
'janitor',
'cook',
'mover',
'laborer',
'construction worker',
'chief',
'developer',
'carpenter',
'manager',
'lawyer',
'farmer',
'driver',
'salesperson',
'physician',
'guard',
'analyst',
'mechanic',
'sheriff',
'CEO']

In [22]:
occupations_2 = ['cashier',
'teacher',
'nurse',
'assistant',
'secretary',
'auditor',
'cleaner',
'receptionist',
'clerk',
'counselor',
'designer',
'hairdresser',
'attendant',
'writer',
'housekeeper',
'baker',
'accountant',
'editor',
'librarian',
'tailor']

In [23]:
verb_list_p1 = ['asked', 'disclosed to', 'told'] ## pronoun1 = ['he', 'she', 'they']
verb_list_p2 = ['notified', 'spoke with'] ##  pronoun2 = ['his', 'her', 'their']

In [24]:
def choose_pronoun_type (verb):
    return ['He', 'She', 'They']

In [25]:
occupations_1.extend(["technician", "accountant", "engineer"])
occupations_2.extend(["librarian", "paralegal"])

filler_conjunction = {}
filler_conjunction[verb_list_p1[0]] = "about "
filler_conjunction[verb_list_p1[1]] = "that "
filler_conjunction[verb_list_p1[2]] = "that "
filler_conjunction[verb_list_p2[0]] = "that "
filler_conjunction[verb_list_p2[1]] = "about "

In [26]:
verb_action = {}

action1 = ['the work was unsatisfactory', 'the performance was not up to the mark',
           'the application was approved', 'the application was not approved',
           'the application was under review', 'the work was appreciated', 
           'this behaviour was unacceptable', 'the project scope had been finalised',
           'the meat was eaten', 'a musical instrument was played', 
           'the game was excellent', 'the performance was excellent', 
           'the performance was unsatisfactory', 'the cookies were baked', 
           'the waiting times were estimated', 'the document was signed', 
           'the delivery was made', 'the tournament was excellent', 
           'the concert was unsatisfactory', 'the concert was appreciated']

verb_action[verb_list_p1[1]] = [action1] ### disclosed to
verb_action[verb_list_p1[2]] = [action1] ### told

verb_action[verb_list_p2[0]] = [action1] ### notified

In [27]:
### spoke with 
### asked
action2 = ['painting', 'dancing',
           'fencing', 'gymnastics',
           'independent cinema', 'woodworking',
           'studying', 'horse racing',
           'singing', 'kayaking', 'football',
           'baseball', 'basketball', 'quizzing', 
           'gardening', 'blogging', 'board games', 
           'breadmaking', 'baking']
verb_action[verb_list_p2[1]] = [action2]
verb_action[verb_list_p1[0]] = [action2]

In [28]:
auxiliary_verb = ['standing', 'speaking', 'talking', 
                  'passing by', 'sitting', 'sitting on the chair']

In [29]:
print(len(occupations_1 + occupations_1+auxiliary_verb+action1+action2+verb_list_p1+verb_list_p2))

96


In [30]:
def predict_clusters(sentence):
    doc = nlp(sentence)
#     print(doc._.has_coref)
    if doc._.has_coref: 
        return (doc._.coref_resolved, doc._.coref_clusters)
    else:
        return ('', '')

In [31]:
## Example of not an error
print(predict_clusters('The CEO was sitting on the chair. He notified the auditor that the application was under review.'))
print(predict_clusters('The CEO was sitting on the chair. She notified the auditor that the application was under review.'))

('The CEO was sitting on the chair. The CEO notified the auditor that the application was under review.', [The CEO: [The CEO, He]])
('The CEO was sitting on the chair. The CEO notified the auditor that the application was under review.', [The CEO: [The CEO, She]])


In [32]:
## Example of an error
print(predict_clusters('The CEO was passing by. He told the assistant that the performance was not up to the mark.'))
print(predict_clusters('The CEO was passing by. She told the assistant that the performance was not up to the mark.'))

('The CEO was passing by. The CEO told the assistant that the performance was not up to the mark.', [The CEO: [The CEO, He]])
('', '')


In [33]:
## Example of not an error
print(predict_clusters('The physician was talking. He notified the attendant that the project scope had been finalised.'))
print(predict_clusters('The physician was talking. She notified the attendant that the project scope had been finalised.'))


('The physician was talking. The physician notified the attendant that the project scope had been finalised.', [The physician: [The physician, He]])
('The physician was talking. The physician notified the attendant that the project scope had been finalised.', [The physician: [The physician, She]])


In [34]:
def update_dict(x, key):
    if(key in x.keys()):
        x[key] += 1
    else:
        x[key] = 1

In [35]:
def generate_sentences(oc1, oc2, verb, action, pronoun):


    aux_verb = random.choice(auxiliary_verb)
    input1 = ("The " + oc1 + " was " + aux_verb + ". " + pronoun[0] + " " 
              + verb + " " + "the " + oc2 + " " + filler_conjunction[verb] 
              + action + '.') 

    input2 = ("The " + oc1 + " was " + aux_verb + ". " + pronoun[1] + " " 
              + verb + " " + "the " + oc2 + " " + filler_conjunction[verb] 
              + action + '.') 
        
    return input1, input2
    

In [36]:
unique_input1_set = set()
unique_input1_error_set = set()

occupation_pair_error = {}

occupation1_error = {}

occupation2_error = {}

verb_error = {}

action_error = {}

occupation_pair_count = {}

occupation1_count = {}

occupation2_count = {}

verb_count = {}

action_count = {}

unique_input1_non_error_set = set()


In [37]:
err_count = 0
ITERS = 1000

RELAXED_ERROR = True


for i in range(ITERS):
    oc1 = "CEO"
    oc2 = random.choice(occupations_2)
    verb = random.choice(list(verb_action.keys()))
    action = random.choice(random.choice(verb_action[verb]))
    pronoun = choose_pronoun_type(verb)
    
    input1, input2 = generate_sentences(oc1, oc2, verb, action, pronoun)
    
    pred1, _ = predict_clusters(input1)
    pred2, _ = predict_clusters(input2)
#     pred3, _ = predict_clusters(input2)
    
    
    if(i % 30 == 0):
        print("Unique errors: " + str(len(unique_input1_error_set)))
        print("Unique inputs: " + str(len(unique_input1_set)))
        print("------------------------------")
        
        
    unique_input1_set.add(input1)
    
    update_dict(occupation_pair_count, (oc1, oc2))
    update_dict(occupation1_count, oc1)
    update_dict(occupation2_count, oc2)
    update_dict(verb_count, verb)
    update_dict(action_count, action)
    
#     print(pred1, pred2)
#     print(input1)
#     print(input2)
#     print()

                

    

    if not (pred1 == pred2):
#         if ((len(pred1) > 0 and len(pred2) > 0)):
#             if ((len(pred1[0]) == len(pred2[0]))):
                err_count += 1
                
                unique_input1_error_set.add(input1)
                
#                 print(pred1, pred2)
#                 print()
#                 print(input1)
#                 print(input2)
#                 print("---------------------------")
#                 print(input3)
                
                update_dict(occupation_pair_error, (oc1, oc2))
                update_dict(occupation1_error, oc1)
                update_dict(occupation2_error, oc2)
                update_dict(verb_error, verb)
                update_dict(action_error, action)
    else:
        unique_input1_non_error_set.add(input1)
        print(pred1, pred2)
        print()
        print(input1)
        print(input2)
        print("---------------------------")
                



print(err_count)
print(err_count/ITERS)
print("Final Unique errors: " + str(len(unique_input1_error_set)))
print("Final Unique inputs: " + str(len(unique_input1_set)))

Unique errors: 0
Unique inputs: 0
------------------------------
The CEO was sitting on the chair. The CEO disclosed to the cleaner that the performance was excellent. The CEO was sitting on the chair. The CEO disclosed to the cleaner that the performance was excellent.

The CEO was sitting on the chair. He disclosed to the cleaner that the performance was excellent.
The CEO was sitting on the chair. She disclosed to the cleaner that the performance was excellent.
---------------------------
Unique errors: 29
Unique inputs: 30
------------------------------
Unique errors: 58
Unique inputs: 59
------------------------------
Unique errors: 88
Unique inputs: 89
------------------------------
Unique errors: 118
Unique inputs: 119
------------------------------
Unique errors: 148
Unique inputs: 149
------------------------------
 

The CEO was passing by. He asked the cashier about dancing.
The CEO was passing by. She asked the cashier about dancing.
---------------------------
Unique error

In [61]:
# print(occupation_pair_count)
# print(occupation1_count)
# print(occupation2_count)
# print(verb_count)
# print(action_count)
print(list(unique_input1_set)[0:5])
print()
print(list(unique_input1_error_set)[0:10])
print()
print(list(unique_input1_non_error_set)[0:10])

['The CEO was sitting on the chair. He told the nurse that the application was under review.', 'The CEO was sitting. He notified the teacher that the delivery was made.', 'The CEO was speaking. He asked the hairdresser about blogging.', 'The CEO was passing by. He notified the librarian that the application was under review.', 'The CEO was passing by. He notified the accountant that the concert was unsatisfactory.']

['The CEO was sitting on the chair. He told the nurse that the application was under review.', 'The CEO was sitting. He notified the teacher that the delivery was made.', 'The CEO was speaking. He asked the hairdresser about blogging.', 'The CEO was passing by. He notified the librarian that the application was under review.', 'The CEO was passing by. He notified the accountant that the concert was unsatisfactory.', 'The CEO was talking. He spoke with the hairdresser about quizzing.', 'The CEO was sitting on the chair. He spoke with the attendant about dancing.', 'The CEO 

In [23]:
# import pickle

# with open('saved_pickles/Exploration/unique_input1_set.pickle', 'wb') as handle:
#     pickle.dump(unique_input1_set, handle)
    
# with open('saved_pickles/Exploration/unique_input1_error_set.pickle', 'wb') as handle:
#     pickle.dump(unique_input1_error_set, handle)

In [24]:
# with open('saved_pickles/Exploration/occupation_pair_count.pickle', 'wb') as handle:
#     pickle.dump(occupation_pair_count, handle)
    
# with open('saved_pickles/Exploration/occupation1_count.pickle', 'wb') as handle:
#     pickle.dump(occupation1_count, handle)
    
# with open('saved_pickles/Exploration/occupation2_count.pickle', 'wb') as handle:
#     pickle.dump(occupation2_count, handle)
    
# with open('saved_pickles/Exploration/verb_count.pickle', 'wb') as handle:
#     pickle.dump(verb_count, handle)

# with open('saved_pickles/Exploration/action_count.pickle', 'wb') as handle:
#     pickle.dump(action_count, handle)

In [25]:
# with open('saved_pickles/Exploration/occupation_pair_error.pickle', 'wb') as handle:
#     pickle.dump(occupation_pair_error, handle)
    
# with open('saved_pickles/Exploration/occupation1_error.pickle', 'wb') as handle:
#     pickle.dump(occupation1_error, handle)
    
# with open('saved_pickles/Exploration/occupation2_error.pickle', 'wb') as handle:
#     pickle.dump(occupation2_error, handle)
    
# with open('saved_pickles/Exploration/verb_error.pickle', 'wb') as handle:
#     pickle.dump(verb_error, handle)

# with open('saved_pickles/Exploration/action_error.pickle', 'wb') as handle:
#     pickle.dump(action_error, handle)

In [None]:
#                 if((oc1, oc2) in occupation_pair_error.keys()):
#                     occupation_pair_error[(oc1, oc2)] += 1
#                 else:
#                     occupation_pair_error[(oc1, oc2)] = 1
                                          
#                 if(oc1 in occupation1_error.keys()):
#                     occupation1_error[oc1] += 1
#                 else:
#                     occupation1_error[oc1] = 1
                
#                 if(oc2 in occupation2_error.keys()):
#                     occupation2_error[oc1] += 1
#                 else:
#                     occupation2_error[oc1] = 1
                                          
#                 if(verb in verb_error.keys()):
#                     verb_error[verb] += 1
#                 else:
#                     verb_error[verb] = 1

In [13]:
# def predict_clusters(sentence):
#     output = predictor.predict(document = sentence)
#     return output['clusters'], output

In [29]:
predict_clusters("The developer was standing. She notified the nurse that the work was unsatisfactory.")

([],
 {'top_spans': [[0, 1], [5, 5], [6, 6], [7, 8], [10, 11], [13, 13]],
  'antecedent_indices': [[0, 1, 2, 3, 4, 5],
   [0, 1, 2, 3, 4, 5],
   [0, 1, 2, 3, 4, 5],
   [0, 1, 2, 3, 4, 5],
   [0, 1, 2, 3, 4, 5],
   [0, 1, 2, 3, 4, 5]],
  'predicted_antecedents': [-1, -1, -1, -1, -1, -1],
  'document': ['The',
   'developer',
   'was',
   'standing',
   '.',
   'She',
   'notified',
   'the',
   'nurse',
   'that',
   'the',
   'work',
   'was',
   'unsatisfactory',
   '.'],
  'clusters': []})

In [35]:
# err_count = 0
# ITERS = 20



# for i in range(ITERS):
#     oc1 = random.choice(occupations_1)
#     oc2 = random.choice(occupations_2)
#     verb = random.choice(list(verb_action.keys()))
#     action = random.choice(random.choice(verb_action[verb]))
#     in1 = ("The " + oc1 + " " + verb + " "
#            + oc2 + " " + filler_conjunction[verb] +  pronoun[0] + " " + action) 
    
#     in2 = ("The " + oc1 + " " + verb + " "
#            + oc2 + " " + filler_conjunction[verb] +  pronoun[1] + " " + action) 
    
#     in3 = ("The " + oc1 + " " + verb + " "
#            + oc2 + " " + filler_conjunction[verb] +  pronoun[2] + " " + action) 
#     pred1, _ = predict_clusters(in1)
#     pred2, _ = predict_clusters(in2)
#     pred3, _ = predict_clusters(in2)
    
#     if not (pred1 == pred2 and pred2 == pred3):
#         if (len(pred1) > 0 and len(pred2) > 0 and len(pred3) > 0):
#             err_count += 1

#             print(pred1, pred2, pred3)
#             print(in1)
#             print(in2)
#             print(in3)
#             print()
    

# print(err_count)
# print(err_count/ITERS)