In [1]:
import spacy
from nltk.stem import WordNetLemmatizer

In [2]:
with open('fine') as data_file:
        sentences = data_file.readlines()

In [3]:
nlp = spacy.load('en_core_web_lg')
wordnet_lemmatizer = WordNetLemmatizer() 

In [4]:
sentences

['Wimbledon ready to fine players for disrespecting ball-kids\n',
 "Can't talk about it, don't want a fine: Dhoni on umpiring errors\n",
 'Pak pacer fined after he threatened to throw ball at batsman\n',
 'Serena fined 12 lakh for violations during US Open final loss\n',
 'Footballer fined 88 lakh after car caught speeding 3 times\n',
 'Zlatan handed fine for slapping opponent during match\n',
 'Broad fined 15% match fees for aggressive send-off to Pant\n',
 'Pakistani pacer fined for showing middle finger to batsman\n',
 'Ex-world no. 1 fined 6.8L for forcing partner to withdraw\n',
 "B'desh player slapped with fine for verbal abuse during match\n",
 "Man once ran naked on Lord's pitch to win 20 bet; fined 20\n",
 'Ishant Sharma fined 15% match fee for Malan send-off\n',
 'Tennis player fined 11 lakh for smashing 3 racquets in match\n',
 'IOA fined 74,000 for damages caused to rooms during CWG\n',
 'Russia fined for neo-Nazi banner at Russia-Uruguay WC game\n',
 'Tennis player simulat

In [76]:
SPACY_DEP_ROOT = "ROOT"
SPACY_DEP_NSUBJ = "nsubj"
SPACY_DEP_NSUBJ_PASS = "nsubjpass"
SPACY_DEP_DOBJ = "dobj"
SPACY_DEP_PREP = "prep"
SPACY_DEP_PREP_OBJ = "pobj"
SPACY_DEP_CCOMP = "ccomp"
SPACY_DEP_IND_OBJ_1 = "dative"
SPACY_DEP_IND_OBJ_2 = "iobj"

SPACY_TAG_PREP = "IN"
SPACY_TAG_NN = "NN"
SPACY_TAG_NNS = "NNS"
SPACY_TAG_NNP = "NNP"
SPACY_TAG_NNPS = "NNPS"
SPACY_TAG_JJ = "JJ"
SPACY_TAG_JJR = "JJR"
SPACY_TAG_JJS = "JJS"
SPACY_TAG_VB = "VB"
SPACY_TAG_VBD = "VBD"
SPACY_TAG_VBG = "VBG"
SPACY_TAG_VBN = "VBN"
SPACY_TAG_VBP = "VBP"
SPACY_TAG_VBZ = "VBZ"

SPACY_NER_MONEY = "MONEY"
SPACY_NER_CARDINAL = "CARDINAL"

PREP_FOR = "for"
PREP_TO = "to"
PREP_FROM = "from"
PREP_OVER = "over"
PREP_UNTIL = "until"
PREP_AFTER = "after"
PREP_AS = "as"

def getSubTreeString(token):
    return ' '.join([str(token1.text) for token1 in list(token.subtree)])

def getHeadOfSentence(sentence):

    doc = nlp(str(sentence))
    head = None
    for token in doc:
        if token.dep_ == SPACY_DEP_ROOT:
            #print ("head",sentence, token.text)
            head = token
            
    return head  

def printTree(sentence):
    doc = nlp(str(sentence))
 
    for token in doc:
        print("{5}: {0}/{1} <--{2}-- {3}/{4} > {6};  type {7}".format(
           token.text, token.tag_, token.dep_, token.head.text, token.head.tag_,token.i,token.head.i, token.ent_id_))
        
        print(list(token.subtree))
        print ('**********')

#def printNamedEntitied(sentence):
    
def printNamedEntitied(sentence):
    doc = nlp(sentence)
    print ("Named entities\n")
    for ent in list(doc.ents):
        print(ent.text, ent.label_)

In [105]:

    
def isTypeBan(sentence):
    pass

def parse(sentence):
    pass

def getAmountFined(sentence):
    doc = nlp(sentence)
    amountFined = None
    for ent in list(doc.ents):
        if (ent.label_  == SPACY_NER_CARDINAL and amountFined == None) or ent.label_ == SPACY_NER_MONEY:
            amountFined = ent.text
            
    return amountFined
        

def parseVerbForm(sentence):
    amount = None
    timePeriod = None
    reason = None
    who = None
    fromWhat = None
    headToken = getHeadOfSentence(sentence)
    doc = nlp(str(sentence))

    associatedPrepositionIds = {}
    
    #Find associated Prep Ids and subject
    for token in doc:
        #print ("ref", token.text, token.head.i, token.head.head.i)
        if (token.head.i == headToken.i) and token.tag_ == SPACY_TAG_PREP:
            associatedPrepositionIds[token.i] = token
        elif token.head.i == headToken.i and (token.dep_ in [SPACY_DEP_NSUBJ_PASS, SPACY_DEP_NSUBJ]):
            who = getSubTreeString(token)
    print (associatedPrepositionIds)      
    for token in doc:
        if token.head.i in associatedPrepositionIds:
            prep = associatedPrepositionIds[token.head.i]
            if prep.text in [PREP_FOR] :
                reason = getSubTreeString(token)
            elif prep.text == PREP_FROM:
                fromWhat = getSubTreeString(token)
            elif prep.text == PREP_OVER:
                reason = getSubTreeString(token)
            elif prep.text == PREP_UNTIL:
                timePeriod = getSubTreeString(token)
    amount = getAmountFined(sentence)          
    print(sentence)
    print(" who: ", who,
         " reason :", reason,
         " amount :", amount,'\n')
    
def parseNounForm(sentence):
    amount = None
    timePeriod = None
    reason = None
    who = None
    fromWhat = None
    banToken = None
    associatedPrepositionIds = {}
    doc = nlp(str(sentence))
    headToken = getHeadOfSentence(sentence)
    
    
    for token in doc:       
        tokenLemma = wordnet_lemmatizer.lemmatize(token.text)
        if tokenLemma == 'fine' or tokenLemma == 'fined':
            banToken = token
        elif token.head.tag_ in [SPACY_TAG_VBN] and token.head.i == headToken.i and (token.dep_ in [SPACY_DEP_NSUBJ_PASS, SPACY_DEP_NSUBJ]):
            who = getSubTreeString(token)
        elif token.head.tag_ != SPACY_TAG_VBN and token.head.i == headToken.i and (token.dep_ in [SPACY_DEP_IND_OBJ_1, SPACY_DEP_IND_OBJ_2]):
            who = getSubTreeString(token)
    # Fetching preps connected to Ban tag
    for token in doc:
        if (token.head.i == banToken.i or token.head.i == headToken.i) and token.tag_ == SPACY_TAG_PREP:
            associatedPrepositionIds[token.i] = token
        
    for token in doc:
        if token.head.i in associatedPrepositionIds:
            prep = associatedPrepositionIds[token.head.i]
            if prep.text == PREP_FOR :
                if token.tag_ in [SPACY_TAG_NN, SPACY_TAG_NNS, SPACY_TAG_NNP, SPACY_TAG_NNPS]:     
                    timePeriod = getSubTreeString(token)
                else:
                    reason = getSubTreeString(token)
            elif prep.text == PREP_FROM:
                fromWhat = getSubTreeString(token)
            elif prep.text == PREP_OVER:
                reason = getSubTreeString(token)
            elif prep.text == PREP_UNTIL:
                timePeriod = getSubTreeString(token)
            elif prep.text == PREP_TO:
                who = getSubTreeString(token)
        elif token.head.i == banToken.i and token.tag_ in [SPACY_TAG_NN, SPACY_TAG_NNS, SPACY_TAG_NNP, SPACY_TAG_NNPS, SPACY_TAG_JJ,SPACY_TAG_JJR, SPACY_TAG_JJS]:
            timePeriod = getSubTreeString(token)
    amount = getAmountFined(sentence)
    print(sentence)
    print(" who: ", who,
         " reason :", reason,
         " amount :", amount,'\n')
    
    
    

In [117]:
#sentence = "Serena fined 12 lakh for violations during US Open final loss"
#sentence = "Footballer fined 88 lakh after car caught speeding 3 times"
#sentence = "England fined 14 lakh as player sips non-sponsored drink"
sentence = "Zlatan handed fine for slapping opponent during match"
#doc = nlp(sentence)
printNamedEntities(sentence)
printTree(sentence)
parseVerbForm(sentence)

Named entities

Zlatan PERSON
0: Zlatan/NNP <--nsubj-- handed/VBD > 1;  type 
[Zlatan]
**********
1: handed/VBD <--ROOT-- handed/VBD > 1;  type 
[Zlatan, handed, fine, for, slapping, opponent, during, match]
**********
2: fine/RB <--dobj-- handed/VBD > 1;  type 
[fine]
**********
3: for/IN <--prep-- handed/VBD > 1;  type 
[for, slapping, opponent, during, match]
**********
4: slapping/VBG <--pcomp-- for/IN > 3;  type 
[slapping, opponent, during, match]
**********
5: opponent/NN <--dobj-- slapping/VBG > 4;  type 
[opponent]
**********
6: during/IN <--prep-- slapping/VBG > 4;  type 
[during, match]
**********
7: match/NN <--pobj-- during/IN > 6;  type 
[match]
**********
{3: for}
Zlatan handed fine for slapping opponent during match
 who:  Zlatan  reason : slapping opponent during match  amount : None 



In [111]:
for sentence in sentences:
    if getHeadOfSentence(sentence).text != "fined":
        #print (sentence)
        parseNounForm (sentence)
        pass
#     else:
#         parseVerbForm(sentence)

Wimbledon ready to fine players for disrespecting ball-kids

 who:  None  reason : None  amount : None 

Can't talk about it, don't want a fine: Dhoni on umpiring errors

 who:  None  reason : None  amount : None 

Zlatan handed fine for slapping opponent during match

 who:  None  reason : None  amount : None 

B'desh player slapped with fine for verbal abuse during match

 who:  None  reason : None  amount : None 

Tennis player simulates masturbation with bottle, fined 12L

 who:  None  reason : None  amount : 12L 

Ronaldo given 2-yr jail, 150-cr fine over tax fraud: Report

 who:  None  reason : tax fraud  amount : 150-cr 

121 crore fine on BCCI, Lalit Modi over 2009 IPL in SA

 who:  None  reason : None  amount : 121 

Smith banned for a Test, fined 100% match fee over tampering

 who:  None  reason : tampering 
  amount : None 

Man City coach handed 18 lakh fine by FA over yellow ribbon

 who:  None  reason : None  amount : 18 lakh 

ICC fines bowler for dropping ball on De Vi

In [None]:
# Serena fined 12 lakh for violations during US Open final loss
# Footballer fined 88 lakh after car caught speeding 3 times