In [1]:
import spacy
import pandas as pd
from spacy import displacy
from nltk.stem import PorterStemmer

In [2]:
nlp = spacy.load("en_core_web_sm")
#http://universaldependencies.org/docs/en/pos/

In [3]:
def parser(text, nlp):
    ps = PorterStemmer()
    doc = nlp(text)
    features = []
    for token in doc:
        features.append (
            {
                'text':token.text,
                #'lemma': token.lemma_,
                'pos': token.pos_,
                'tag': token.tag_,
                'shape': token.shape_,
                'lemma':token.lemma_,    
                'stem': ps.stem(token.text)
                #'dep': token.dep_
            }
        )
    return pd.DataFrame(features)

def parseTree(text, nlp):
    doc = nlp(text)
    features = []
    for token in doc:
         features.append (
            {
                            
                #'tag': token.tag_,                
                'children': [child for child in token.children],
                'head.pos': token.head.pos_,
                'head': token.head.text,
                'dep': token.dep_,
                'text':token.text,    
                
            }
        )
    return pd.DataFrame(features)[['text','dep','head','head.pos','children']]

def showDoc(text, nlp, style = 'ent'):
    doc = nlp(text)
    displacy.render(doc, style=style, jupyter=True)
    
def showDep(text, nlp):
    showDoc(text, nlp, 'dep')
    
def showEnt(text, nlp):
    showDoc(text, nlp, 'ent')    

In [6]:
text = """Caution prevails over Trump's legal woes and U.S.A-China-EU trade talks""" # Tokenization
parser(text, nlp)

Unnamed: 0,lemma,pos,shape,stem,tag,text
0,caution,NOUN,Xxxxx,caution,NN,Caution
1,prevail,VERB,xxxx,prevail,VBZ,prevails
2,over,ADP,xxxx,over,IN,over
3,trump,PROPN,Xxxxx,trump,NNP,Trump
4,'s,PART,'x,'s,POS,'s
5,legal,ADJ,xxxx,legal,JJ,legal
6,woe,NOUN,xxxx,woe,NNS,woes
7,and,CCONJ,xxx,and,CC,and
8,u.s.a,PROPN,X.X.X,u.s.a,NNP,U.S.A
9,-,PUNCT,-,-,HYPH,-


In [5]:
text = """Apple opens new store in Tokyo Shinjuku District Saturday""" # Part-of-Speech
parser(text, nlp)

Unnamed: 0,lemma,pos,shape,stem,tag,text
0,apple,PROPN,Xxxxx,appl,NNP,Apple
1,open,VERB,xxxx,open,VBZ,opens
2,new,ADJ,xxx,new,JJ,new
3,store,NOUN,xxxx,store,NN,store
4,in,ADP,xx,in,IN,in
5,tokyo,PROPN,Xxxxx,tokyo,NNP,Tokyo
6,shinjuku,PROPN,Xxxxx,shinjuku,NNP,Shinjuku
7,district,PROPN,Xxxxx,district,NNP,District
8,saturday,PROPN,Xxxxx,saturday,NNP,Saturday


In [6]:
text = """Caution prevails over Trump legal woes and U.S.A-China-EU trade talks""" # Part-of-Speech ambiguity (check Trump) 
parser(text, nlp)

Unnamed: 0,lemma,pos,shape,stem,tag,text
0,caution,NOUN,Xxxxx,caution,NN,Caution
1,prevail,VERB,xxxx,prevail,VBZ,prevails
2,over,ADP,xxxx,over,IN,over
3,trump,ADJ,Xxxxx,trump,JJ,Trump
4,legal,ADJ,xxxx,legal,JJ,legal
5,woe,NOUN,xxxx,woe,NNS,woes
6,and,CCONJ,xxx,and,CC,and
7,u.s.a,PROPN,X.X.X,u.s.a,NNP,U.S.A
8,-,PUNCT,-,-,HYPH,-
9,china,PROPN,Xxxxx,china,NNP,China


In [7]:
text = """Adam is studying the book you got him yesterday"""# Lemmatization & Stemming  (Check Studying)
parser(text, nlp)

Unnamed: 0,lemma,pos,shape,stem,tag,text
0,adam,PROPN,Xxxx,adam,NNP,Adam
1,be,VERB,xx,is,VBZ,is
2,study,VERB,xxxx,studi,VBG,studying
3,the,DET,xxx,the,DT,the
4,book,NOUN,xxxx,book,NN,book
5,-PRON-,PRON,xxx,you,PRP,you
6,get,VERB,xxx,got,VBD,got
7,-PRON-,PRON,xxx,him,PRP,him
8,yesterday,NOUN,xxxx,yesterday,NN,yesterday


In [9]:
text = """Apple opens new store in U.A.E Dubai city Saturday""" # Dependencies
parseTree(text, nlp)

Unnamed: 0,text,dep,head,head.pos,children
0,Apple,nsubj,opens,VERB,[]
1,opens,ROOT,opens,VERB,"[Apple, store, Saturday]"
2,new,amod,store,NOUN,[]
3,store,dobj,opens,VERB,"[new, in]"
4,in,prep,store,NOUN,[city]
5,U.A.E,compound,Dubai,PROPN,[]
6,Dubai,compound,city,NOUN,[U.A.E]
7,city,pobj,in,ADP,[Dubai]
8,Saturday,npadvmod,opens,VERB,[]


In [8]:
text = """Apple opens new store in U.A.E Dubai city Saturday"""
showDep(text, nlp)

In [10]:
text = """Apple opens new store in U.A.E Dubai city Saturday""" # Named Entity Recognition 
#text = """Caution prevails over Donald Trump legal woes and U.S.A-China-EU trade talks""" # Named Entity Recognition 

showEnt(text, nlp)