In [130]:
from nltk.corpus import wordnet as wn
import nltk

In [131]:
HYPER = lambda s: s.hypernyms()
IS_LIVING = 'living_thing.n.01'
NOUN = ['NN']
CHARACTER = 'CH'
TEXT = "The king then realized that if what the sweeper had said about him was not true what he had said about \
Dantila also could not be true. A person like Dantila could not have done what Gorambha had told him. The king also \
found that without Dantila the affairs of the state had suffered and civic administration had come to a standstill. \
The king immediately summoned the merchant to his palace and restored to him all the authority he had enjoyed before \
he fell out of king's favour."

In [132]:
def getPosTags(text):
    pos = []
    sent = nltk.sent_tokenize(text)
    for s in sent:
        words = nltk.word_tokenize(s)
        pos.append(nltk.pos_tag(words))
    return pos

In [133]:
def findIndexPos(pos_text,pos_tag):
    ret = []
    for o_index,pos_sent in enumerate(pos_text):
        for i_index,pos_word in enumerate(pos_sent):
            if(pos_word[1] == pos_tag):
                ret.append((o_index,i_index))
    return ret  

In [134]:
def getPosWordByIndex(pos_text,index):
    if(index == None):
        return None
    
    if(index[0] < len(pos_text)):
        if(index[1] < len(pos_text[index[0]])):
            return pos_text[index[0]][index[1]]
    return None

In [135]:
def getNextPosIndex(pos_text,index):
    if(index[0] < len(pos_text)):
        if(index[1] + 1 < len(pos_text[index[0]])):
            return (index[0],index[1] + 1)
    return None

In [136]:
def constructSynSetText(pos_word):
    if(pos_word is None):
        return None
    if(pos_word[1] in NOUN):
        return pos_word[0] + '.n.01'

In [137]:
def isLiving(pos_word):
    if(pos_word is None):
        return False
    wn_pos = constructSynSetText(pos_word)
    wn_lemma = wn.synset(wn_pos)
    l_hyper = str(list(wn_lemma.closure(HYPER)))
    if(IS_LIVING in l_hyper):
        return True

    return False

In [138]:
def markCharacter(pos_text,index,n_index):
    word = getPosWordByIndex(pos_text,index)[0] + '_' + getPosWordByIndex(pos_text,n_index)[0]
    pos_text[index[0]][index[1]] = (word,CHARACTER)
    pos_text[n_index[0]].pop(n_index[1])
    return pos_text

In [139]:
def returnCharacters(text):
    animate_characters=[]
    for a in text:
        for i in a:
            if i[1] == 'CH' or i[1] == 'NNP': 
                animate_characters.append(i[0])
    return animate_characters

In [140]:
pos_text = getPosTags(TEXT)
#pos_text

In [141]:
prp_index = findIndexPos(pos_text,'PRP$')
for index in prp_index:
    n_index = getNextPosIndex(pos_text,index)
    pos_word = getPosWordByIndex(pos_text,n_index)
    ret = isLiving(pos_word)
    if(ret):
        pos_text = markCharacter(pos_text,index,n_index)
#pos_text

In [142]:
animate_char_list = returnCharacters(pos_text)
print(animate_char_list)

['Dantila', 'Dantila', 'Gorambha', 'Dantila']


In [143]:
import spacy
nlp = spacy.load('en_coref_sm')    #Small model : Takes 10 secs to load
#nlp = spacy.load('en_coref_md')   #Medium model : Takes 2 mins to load
def replacePronouns(sent):
    temp = str(sent)
    doc = nlp(sent)
    #print(doc)
    if(doc._.has_coref):
        #print(doc._.coref_clusters[0].mentions)
        for i in range(len(doc._.coref_clusters)):
            main = doc._.coref_clusters[i].mentions[-1]._.coref_cluster.main
            main = str(main)
            for x in doc._.coref_clusters[i].mentions[1:]:
                x=str(x)
                temp = temp.replace(x,main)
        print(temp)
    else:
        print("XXXXXXXXXXXXXXX")
    


In [144]:
def identifyPronouns(sent):
    doc = nlp(sent)
    #print(doc)
    if(doc._.has_coref):
        print(doc._.coref_clusters[0].mentions)
        for i in range(len(doc._.coref_clusters)):
            print('MAIN Word : ',doc._.coref_clusters[i].mentions[-1]._.coref_cluster.main)
            print('Pronouns Used ( Which are supposed to be replaced) : ',[x for x in doc._.coref_clusters[i].mentions[1:]])
    print()

In [145]:
identifyPronouns(TEXT)
#replacePronouns("John and John's friend worked on the project everyday.")

[The king, him, he, The king, The king, his, him, he, he, king]
MAIN Word :  The king
Pronouns Used ( Which are supposed to be replaced) :  [him, he, The king, The king, his, him, he, he, king]
MAIN Word :  Dantila
Pronouns Used ( Which are supposed to be replaced) :  [Dantila]
MAIN Word :  A person like Dantila
Pronouns Used ( Which are supposed to be replaced) :  [him]



In [148]:
def replace_pronouns_with_nouns(sent):
    pronounMap = {}
    doc = nlp(sent)
    #print(doc)
    if(doc._.has_coref):
        #print(doc._.coref_clusters[0].mentions)
        for i in range(len(doc._.coref_clusters)):
            main = doc._.coref_clusters[i].mentions[-1]._.coref_cluster.main
            main = str(main)
            pro = []
            for x in doc._.coref_clusters[i].mentions[1:] :
                x=str(x)
                pro.append(x)
            pronounMap.update({main:pro})
           
    for key in pronounMap.keys():
        temp = str(TEXT)
        for val in pronounMap.get(key):
            temp = temp.replace (val,key)
        
    return temp

In [149]:
newSentence = replace_pronouns_with_nouns(TEXT)
print(newSentence)

[The king, him, he, The king, The king, his, him, he, he, king]
The king then realized that if what the sweeper had said about A person like Dantila was not true what he had said about Dantila also could not be true. A person like Dantila could not have done what Gorambha had told A person like Dantila. The king also found that without Dantila the affairs of the state had suffered and civic administration had come to a standstill. The king immediately summoned the merchant to his palace and restored to A person like Dantila all the authority he had enjoyed before he fell out of king's favour.


In [167]:
from stanfordcorenlp import StanfordCoreNLP
import pickle
import json

In [168]:
#sentence = u"The king then realized that if what Gorambha had said about the King was not true what Gorambha had said about Dantila also could not be true. A person like Dantila could not have done what Gorambha had told the king. The king also found that without Dantila the affairs of the state had suffered and civic administration had come to a standstill. The king immediately summoned Dantila to the king's palace and restored to Dantila all the authority Dantila had enjoyed before Dantila fell out of \
#king's favour."

#sentence = "John and John's friend worked on the project everyday."
def relationship_Identifier(sentence):
    nlp = StanfordCoreNLP(r'F:\UIC\SNLP\StanfordNLP_Tools\stanford-corenlp-full-2018-10-05')
    output = nlp.annotate(sentence, properties={"annotators":"tokenize,ssplit,pos,depparse,natlog,openie",
                                     "outputFormat": "json","openie.triple.strict":"true","splitter.disable" : "true"})

    if (type(output) is str):
        output = json.loads(output, strict=False)
    #pickle.dump(output, open( "save.p", "wb" ))

    result = [output["sentences"][0]["openie"] for item in output]
    #print(result)
    for i in result:
        for rel in i:
            relationSent=rel['relation'],rel['subject'],rel['object']
            print(relationSent)

    nlp.close()

In [169]:
sent = nltk.sent_tokenize(newSentence)
for s in sent:
    s = str(s)
    relationship_Identifier(s)

('had said about', 'sweeper', 'person like Dantila')
('had said about', 'sweeper', 'person')
('had said about', 'he', 'Dantila')
('had come to', 'civic administration', 'standstill')
('had come to', 'administration', 'standstill')
('suffered without', 'affairs', 'Dantila')
('summoned', 'king', 'merchant')
('immediately summoned', 'king', 'merchant')
('summoned merchant to', 'king', 'his palace')
('immediately summoned merchant to', 'king', 'his palace')
