In [29]:
import spacy
nlp = spacy.load("en")

In [12]:
dox = nlp(u"1 book at 12.49")

In [15]:
for token in dox:
    print(token.text)

1
book
at
12.49


In [17]:
[token.text for token in dox]

['1', 'book', 'at', '12.49']

In [22]:
#word shape

for word in dox:
    print(word.text, word.shape_, word.shape)

1 d 8148669997605808657
book xxxx 13110060611322374290
at xx 4370460163704169311
12.49 dd.dd 2487714069769082087


In [27]:
[[token.pos_, token.pos] for token in dox]

[['NUM', 93], ['NOUN', 92], ['ADP', 85], ['NUM', 93]]

# Parts of Speech tagging

In [31]:
[[token.tag_, token.tag] for token in dox]

[['CD', 8427216679587749980],
 ['NN', 15308085513773655218],
 ['IN', 1292078113972184607],
 ['CD', 8427216679587749980]]

In [33]:
for word in dox:
    print(word.text, word.pos_, word.tag_)

1 NUM CD
book NOUN NN
at ADP IN
12.49 NUM CD


In [35]:
spacy.explain("CD")

'cardinal number'

In [39]:
ex1= nlp(u"All the faith he had had had had no effect on the outcome of his life")

('All', 'PDT', 'DET')
('the', 'DT', 'DET')
('faith', 'NN', 'NOUN')
('he', 'PRP', 'PRON')
('had', 'VBD', 'AUX')
('had', 'VBD', 'AUX')
('had', 'VBN', 'VERB')
('had', 'VBN', 'VERB')
('no', 'DT', 'DET')
('effect', 'NN', 'NOUN')
('on', 'IN', 'ADP')
('the', 'DT', 'DET')
('outcome', 'NN', 'NOUN')
('of', 'IN', 'ADP')
('his', 'PRP$', 'DET')
('life', 'NN', 'NOUN')


In [43]:
ex2 = nlp("1 music CD at 14.99")

In [46]:
for word in ex2:
    print((word.text, word.tag_, word.pos_, word.dep_))

('1', 'CD', 'NUM', 'nummod')
('music', 'NN', 'NOUN', 'compound')
('CD', 'NN', 'NOUN', 'ROOT')
('at', 'IN', 'ADP', 'prep')
('14.99', 'CD', 'NUM', 'pobj')


In [48]:
spacy.explain('nummod')

'numeric modifier'

In [50]:
from spacy import displacy

In [53]:
displacy.render(ex2, style='dep', jupyter=True)

# Lemmatization

In [56]:
for w in ex2:
    print(w.text, w.lemma)

1 5533571732986600803
music 13617878141848112644
CD 17659931425627118568
at 11667289587015813222
14.99 16757420910995015683


In [60]:
for w in ex2:
    print(w.text, w.lemma_, w.pos_)

1 1 NUM
music music NOUN
CD cd NOUN
at at ADP
14.99 14.99 NUM


# Named Entity Rec

In [64]:
for w in ex2.ents:
    print(w.label_)

CARDINAL
CARDINAL


In [66]:
ex3 = nlp(u"1 imported box of chocolates at 10.00")

In [69]:
for w in ex3.ents:
    print(w.text, w.label_)

1 CARDINAL
10.00 CARDINAL


In [71]:
displacy.render(ex3, style="ent", jupyter=True)

In [77]:
ex4 = nlp(u"The universe of the Game of Life is an infinite two-dimensional orthogonal grid of square cells")

In [79]:
displacy.render(ex4, style="ent", jupyter=True)

# Visualising with displacy

# BERT NER WITH SIMPLE TRANSFORMER LIBRARY