In [2]:
import spacy
from nltk.stem import WordNetLemmatizer

In [3]:
with open('named') as data_file:
        sentences = data_file.readlines()

In [4]:
nlp = spacy.load('en_core_web_lg')
wordnet_lemmatizer = WordNetLemmatizer() 

In [12]:
sentences

["Dhoni not named in India's squads for T20Is against WI, Australia\n",
 'Brazil legend Pele was named after American inventor Thomas Edison\n',
 "Ex-WC winning Arsenal player Henry named Monaco's head coach\n",
 'Shaw 6th Indian to be named Man of the Match on Test debut\n',
 "16-yr-old Manu named India's flag-bearer at Youth Olympics\n",
 "Modric who led Croatia to WC final named Best FIFA Men's Player\n",
 "France's World Cup-winning manager named Best FIFA Men's Coach\n",
 "18-yr-old swimmer who bagged 6 golds named 2018 Asiad's MVP\n",
 'Warne to be named RR coach, KXIP and RCB sack their coaches\n',
 'Manager who led India to WT20 2007 title named Zimbabwe coach\n',
 "India's U-19 WC-winning captain named in squad for Eng Tests\n",
 "Ex-India player pips Kohli's coach, named women's team coach\n",
 'Lionel Messi named Barcelona captain for 2018-19 season\n',
 'Did he drop trophy, tweets user as Akmal named best keeper\n',
 'Mumbai-born spinner named in NZ squad for Tests vs Pakis

In [19]:
SPACY_DEP_ROOT = "ROOT"
SPACY_DEP_NSUBJ = "nsubj"
SPACY_DEP_NSUBJ_PASS = "nsubjpass"
SPACY_DEP_DOBJ = "dobj"
SPACY_DEP_PREP = "prep"
SPACY_DEP_PREP_OBJ = "pobj"
SPACY_DEP_CCOMP = "ccomp"
SPACY_DEP_IND_OBJ_1 = "dative"
SPACY_DEP_IND_OBJ_2 = "iobj"

SPACY_TAG_PREP = "IN"
SPACY_TAG_NN = "NN"
SPACY_TAG_NNS = "NNS"
SPACY_TAG_NNP = "NNP"
SPACY_TAG_NNPS = "NNPS"
SPACY_TAG_JJ = "JJ"
SPACY_TAG_JJR = "JJR"
SPACY_TAG_JJS = "JJS"
SPACY_TAG_VB = "VB"
SPACY_TAG_VBD = "VBD"
SPACY_TAG_VBG = "VBG"
SPACY_TAG_VBN = "VBN"
SPACY_TAG_VBP = "VBP"
SPACY_TAG_VBZ = "VBZ"

SPACY_NER_MONEY = "MONEY"
SPACY_NER_CARDINAL = "CARDINAL"

PREP_FOR = "for"
PREP_TO = "to"
PREP_FROM = "from"
PREP_OVER = "over"
PREP_UNTIL = "until"
PREP_AFTER = "after"
PREP_AS = "as"

SPACY_SUBJECTS = ["nsubj", "nsubjpass", "csubj", "csubjpass", "agent", "expl"]
SPACY_OBJECTS = ["dobj", "dative", "attr", "oprd"]
SPACY_ADJECTIVES = ["acomp", "advcl", "advmod", "amod", "appos", "nn", "nmod", "ccomp", "complm",
              "hmod", "infmod", "xcomp", "rcmod", "poss"," possessive"]
SPACY_COMPOUNDS = ["compound"]
SPACY_PREPOSITIONS = ["prep"]

def getSubTreeString(token):
    return ' '.join([str(token1.text) for token1 in list(token.subtree)])

def getHeadOfSentence(sentence):

    doc = nlp(str(sentence))
    head = None
    for token in doc:
        if token.dep_ == SPACY_DEP_ROOT:
            #print ("head",sentence, token.text)
            head = token
            
    return head  

def printTree(sentence):
    doc = nlp(str(sentence))
 
    for token in doc:
        print("{5}: {0}/{1} <--{2}-- {3}/{4} > {6};  type {7}".format(
           token.text, token.tag_, token.dep_, token.head.text, token.head.tag_,token.i,token.head.i, token.ent_id_))
        
        print(list(token.subtree))
        print ('**********')

#def printNamedEntitied(sentence):
    
def printNamedEntities(sentence):
    doc = nlp(sentence)
    print ("Named entities\n")
    for ent in list(doc.ents):
        print(ent.text, ent.label_)

In [40]:
sentence = "There is an asteroid named after tennis star Rafael Nadal"
#doc = nlp(sentence)
printNamedEntities(sentence)
printTree(sentence)
parse(sentence)
#parseVerbForm(sentence)

Named entities

Rafael Nadal PERSON
0: There/EX <--expl-- is/VBZ > 1;  type 
[There]
**********
1: is/VBZ <--ROOT-- is/VBZ > 1;  type 
[There, is, an, asteroid, named, after, tennis, star, Rafael, Nadal]
**********
2: an/DT <--det-- asteroid/NN > 3;  type 
[an]
**********
3: asteroid/NN <--attr-- is/VBZ > 1;  type 
[an, asteroid, named, after, tennis, star, Rafael, Nadal]
**********
4: named/VBN <--acl-- asteroid/NN > 3;  type 
[named, after, tennis, star, Rafael, Nadal]
**********
5: after/IN <--prep-- named/VBN > 4;  type 
[after, tennis, star, Rafael, Nadal]
**********
6: tennis/NN <--compound-- star/NN > 7;  type 
[tennis]
**********
7: star/NN <--compound-- Nadal/NNP > 9;  type 
[tennis, star]
**********
8: Rafael/NNP <--compound-- Nadal/NNP > 9;  type 
[Rafael]
**********
9: Nadal/NNP <--pobj-- after/IN > 5;  type 
[tennis, star, Rafael, Nadal]
**********
There is an asteroid named after tennis star Rafael Nadal
Who:  None   name: an asteroid named after tennis star Rafael Nadal


In [31]:
def parse(sentence):
    doc = nlp(sentence)
    whoWasNamed = None
    name = None
    headToken = getHeadOfSentence(sentence)
    namedToken = None
    for token in doc:
        if token.text == "named":
            namedToken = token 
    for token in doc:
        if token.dep_ in [SPACY_DEP_NSUBJ, SPACY_DEP_NSUBJ_PASS]:
            whoWasNamed = getSubTreeString(token)
        elif token.dep_ in SPACY_OBJECTS:
            name = getSubTreeString(token)
    print(sentence)
    print ("Who: ", whoWasNamed, "  name:", name)
    print('\n')

In [32]:
for sentence in sentences:
    sentence = sentence.replace('\n','')
    parse(sentence)

Dhoni not named in India's squads for T20Is against WI, Australia
Who:  None   name: None


Brazil legend Pele was named after American inventor Thomas Edison
Who:  Brazil legend Pele   name: None


Ex-WC winning Arsenal player Henry named Monaco's head coach
Who:  Ex - WC winning Arsenal player Henry   name: Monaco 's head coach


Shaw 6th Indian to be named Man of the Match on Test debut
Who:  None   name: Man of the Match on Test debut


16-yr-old Manu named India's flag-bearer at Youth Olympics
Who:  16-yr - old Manu   name: India 's flag - bearer at Youth Olympics


Modric who led Croatia to WC final named Best FIFA Men's Player
Who:  who   name: Best FIFA Men 's Player


France's World Cup-winning manager named Best FIFA Men's Coach
Who:  None   name: Best FIFA Men 's Coach


18-yr-old swimmer who bagged 6 golds named 2018 Asiad's MVP
Who:  who   name: 2018 Asiad 's MVP


Warne to be named RR coach, KXIP and RCB sack their coaches
Who:  Warne   name: their coaches


Manager who l