In [1]:
import spacy
from nltk.stem import WordNetLemmatizer

In [7]:
with open('pay') as data_file:
        sentences = data_file.readlines()

In [8]:
nlp = spacy.load('en_core_web_lg')
wordnet_lemmatizer = WordNetLemmatizer() 

In [9]:
sentences

["Rohit pays tribute to Marvel's Stan Lee, thanks him for 'Hulk'\n",
 "There can be hell to pay: Putin on Khabib's UFC brawl\n",
 'Sreesanth may have to pay 50 lakh if he quits Bigg Boss\n',
 "They will pay for these tears: Sister on Ronaldo's red card\n",
 'Pak fan Chacha Chicago pays for Sachin fan to see Asia Cup in UAE\n',
 "Anderson pays tribute to Cook wearing 'Ally Ally Cook' t-shirt\n",
 'BCCI pays Gambhir 1 crore for tournaments played before 2015\n',
 'Shami to pay wife 80,000 as maintenance instead of 10 lakh\n',
 'Shrewd to the core: Cricketers pay tribute to Ajit Wadekar\n',
 'Ministry clears 804-member Asiad contingent, to pay for 755\n',
 "Boxer breaks trainer's tooth, pays 89,000 dental bill\n",
 "Zimbabwe's board gets funds from ICC to pay cricketers, staff\n",
 '600 Indians working in Qatar for WC 2022 stranded without pay\n',
 'Wimbledon cleaning staff take food from bins due to low pay\n',
 '19-yr-old Mbappe to donate 3.5-cr World Cup pay to charity\n',
 'Real Madri

In [34]:
SPACY_DEP_ROOT = "ROOT"
SPACY_DEP_NSUBJ = "nsubj"
SPACY_DEP_NSUBJ_PASS = "nsubjpass"
SPACY_DEP_DOBJ = "dobj"
SPACY_DEP_PREP = "prep"
SPACY_DEP_PREP_OBJ = "pobj"
SPACY_DEP_CCOMP = "ccomp"
SPACY_DEP_IND_OBJ_1 = "dative"
SPACY_DEP_IND_OBJ_2 = "iobj"

SPACY_TAG_PREP = "IN"
SPACY_TAG_NN = "NN"
SPACY_TAG_NNS = "NNS"
SPACY_TAG_NNP = "NNP"
SPACY_TAG_NNPS = "NNPS"
SPACY_TAG_JJ = "JJ"
SPACY_TAG_JJR = "JJR"
SPACY_TAG_JJS = "JJS"
SPACY_TAG_VB = "VB"
SPACY_TAG_VBD = "VBD"
SPACY_TAG_VBG = "VBG"
SPACY_TAG_VBN = "VBN"
SPACY_TAG_VBP = "VBP"
SPACY_TAG_VBZ = "VBZ"

SPACY_NER_MONEY = "MONEY"
SPACY_NER_CARDINAL = "CARDINAL"

PREP_FOR = "for"
PREP_TO = "to"
PREP_FROM = "from"
PREP_OVER = "over"
PREP_UNTIL = "until"
PREP_AFTER = "after"
PREP_AS = "as"

SPACY_SUBJECTS = ["nsubj", "nsubjpass", "csubj", "csubjpass", "agent", "expl"]
SPACY_OBJECTS = ["dobj", "dative", "attr", "oprd"]
SPACY_ADJECTIVES = ["acomp", "advcl", "advmod", "amod", "appos", "nn", "nmod", "ccomp", "complm",
              "hmod", "infmod", "xcomp", "rcmod", "poss"," possessive"]
SPACY_COMPOUNDS = ["compound"]
SPACY_PREPOSITIONS = ["prep"]

def getSubTreeString(token):
    return ' '.join([str(token1.text) for token1 in list(token.subtree)])

def getHeadOfSentence(sentence):

    doc = nlp(str(sentence))
    head = None
    for token in doc:
        if token.dep_ == SPACY_DEP_ROOT:
            #print ("head",sentence, token.text)
            head = token
            
    return head  

def printTree(sentence):
    doc = nlp(str(sentence))
 
    for token in doc:
        print("{5}: {0}/{1} <--{2}-- {3}/{4} > {6};  type {7}".format(
           token.text, token.tag_, token.dep_, token.head.text, token.head.tag_,token.i,token.head.i, token.ent_id_))
        
        print(list(token.subtree))
        print ('**********')

#def printNamedEntitied(sentence):
    
def printNamedEntities(sentence):
    doc = nlp(sentence)
    print ("Named entities\n")
    for ent in list(doc.ents):
        print(ent.text, ent.label_)

In [50]:
sentence = "Federer loses RF logo to Nike, new deal pays him 2,058 crs"
#doc = nlp(sentence)
printNamedEntities(sentence)
printTree(sentence)
parse(sentence)

Named entities

Federer ORG
RF ORG
Nike ORG
2,058 CARDINAL
0: Federer/NNP <--nsubj-- loses/VBZ > 1;  type 
[Federer]
**********
1: loses/VBZ <--ccomp-- pays/VBZ > 9;  type 
[Federer, loses, RF, logo, to, Nike]
**********
2: RF/NNP <--compound-- logo/NN > 3;  type 
[RF]
**********
3: logo/NN <--dobj-- loses/VBZ > 1;  type 
[RF, logo]
**********
4: to/IN <--dative-- loses/VBZ > 1;  type 
[to, Nike]
**********
5: Nike/NNP <--pobj-- to/IN > 4;  type 
[Nike]
**********
6: ,/, <--punct-- pays/VBZ > 9;  type 
[,]
**********
7: new/JJ <--amod-- deal/NN > 8;  type 
[new]
**********
8: deal/NN <--nsubj-- pays/VBZ > 9;  type 
[new, deal]
**********
9: pays/VBZ <--ROOT-- pays/VBZ > 9;  type 
[Federer, loses, RF, logo, to, Nike, ,, new, deal, pays, him, 2,058, crs]
**********
10: him/PRP <--dative-- pays/VBZ > 9;  type 
[him]
**********
11: 2,058/CD <--nummod-- crs/NNS > 12;  type 
[2,058]
**********
12: crs/NNS <--dobj-- pays/VBZ > 9;  type 
[2,058, crs]
**********
{}
Federer loses RF logo to Nike

In [48]:
def getAmountFined(sentence):
    doc = nlp(sentence)
    amountFined = None
    for ent in list(doc.ents):
        if (ent.label_  == SPACY_NER_CARDINAL and amountFined == None) or ent.label_ == SPACY_NER_MONEY:
            amountFined = ent.text
            
    return amountFined

def parse(sentence):
    amount = None
    timePeriod = None
    reason = None
    who = None
    fromWhat = None
    headToken = getHeadOfSentence(sentence)
    doc = nlp(str(sentence))
    whatPaid = None
    associatedPrepositionIds = {}
    
    #Find associated Prep Ids and subject
    for token in doc:
        #print ("ref", token.text, token.head.i, token.head.head.i)
        if (token.head.i == headToken.i) and token.tag_ == SPACY_TAG_PREP:
            associatedPrepositionIds[token.i] = token
        elif token.head.i == headToken.i and (token.dep_ in [SPACY_DEP_NSUBJ_PASS, SPACY_DEP_NSUBJ]):
            who = getSubTreeString(token)
    print (associatedPrepositionIds)      
    for token in doc:
        if token.head.i in associatedPrepositionIds:
            prep = associatedPrepositionIds[token.head.i]
            if prep.text in [PREP_FOR, PREP_AS] :
                reason = getSubTreeString(token)
        if token.head.i == headToken.i and token.dep_ in SPACY_OBJECTS:
            whatPaid = getSubTreeString(token)
    amount = getAmountFined(sentence)  
    if whatPaid == None and amount != None:
        whatPaid = "money"
    print(sentence)
    print(" who: ", who,
         " reason :", reason,
         " amount :", amount,
          "what: ", whatPaid,'\n')



In [40]:
for sentence in sentences:
    sentence.replace('\n','')
    parse(sentence)

{3: to}
Rohit pays tribute to Marvel's Stan Lee, thanks him for 'Hulk'

 who:  Rohit  reason : None  amount : None 

{8: on}
There can be hell to pay: Putin on Khabib's UFC brawl

 who:  None  reason : None  amount : None 

{}
Sreesanth may have to pay 50 lakh if he quits Bigg Boss

 who:  Sreesanth  reason : None  amount : 50 lakh 

{3: for}
They will pay for these tears: Sister on Ronaldo's red card

 who:  They  reason : these tears  amount : None 

{5: for}
Pak fan Chacha Chicago pays for Sachin fan to see Asia Cup in UAE

 who:  Pak fan Chacha Chicago  reason : Sachin fan  amount : None 

{3: to}
Anderson pays tribute to Cook wearing 'Ally Ally Cook' t-shirt

 who:  Anderson  reason : None  amount : None 

{}
BCCI pays Gambhir 1 crore for tournaments played before 2015

 who:  BCCI  reason : None  amount : None 

{}
Shami to pay wife 80,000 as maintenance instead of 10 lakh

 who:  None  reason : None  amount : 80,000 

{8: to}
Shrewd to the core: Cricketers pay tribute to Ajit Wa

In [None]:
# Noun form "payment" not supported