In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
doc = nlp(u"Tesla is looking at buying U.S. startup for $6 million")

In [4]:
for token in doc:
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S. PROPN dobj
startup VERB dep
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [5]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x7f956a1c1580>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x7f956a1c17c0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x7f9521ed9f90>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x7f950a362100>),
 ('lemmatizer',
  <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x7f956a2b4680>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x7f9519d1e040>)]

In [7]:
doc2 = nlp(u"Tesla isn't looking into startups anymore.")

for token in doc2:
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
n't PART neg
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [8]:
doc2

Tesla isn't looking into startups anymore.

In [11]:
doc2[0]

Tesla

In [12]:
spacy.explain('PROPN')

'proper noun'

In [13]:
spacy.explain('nsubj')

'nominal subject'

In [14]:
doc2

Tesla isn't looking into startups anymore.

In [16]:
doc2[3].text

'looking'

In [17]:
doc2[3].lemma_

'look'

In [22]:
print(doc2[3].pos_)
print(spacy.explain(doc2[3].pos_))
print(doc2[3].tag_ + '/' + spacy.explain(doc2[3].tag_))

VERB
verb
VBG/verb, gerund or present participle


In [23]:
doc2[0].text

'Tesla'

In [24]:
doc2[0].shape_

'Xxxxx'

In [25]:
doc2[0].is_alpha

True

In [26]:
doc2[0].is_stop

False

In [27]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [28]:
life_quote = doc3[16:30]
print(life_quote)

"Life is what happens to us while we are making other plans"


In [30]:
type(life_quote)

spacy.tokens.span.Span

In [31]:
doc4 = nlp(u'This is the first sentence. This is another sentence. This is the last sentence.')

In [32]:
doc4

This is the first sentence. This is another sentence. This is the last sentence.

In [33]:
for sent in doc4.sents:
    print(sent)

This is the first sentence.
This is another sentence.
This is the last sentence.


In [35]:
# "We're moving to L.A.!"
mystring = '"We\'re moving to L.A.!"'
print(mystring)

"We're moving to L.A.!"


In [36]:
doc5 = nlp(mystring)

for token in doc5:
    print(token.text, end=" | ")

" | We | 're | moving | to | L.A. | ! | " | 

<img src="tokenization.png" width="600">

In [37]:
# We're here to help! Send snail-mail, email supprt@oursite.com 
# or visit us at https://www.oursite.com!
doc6 = nlp(u"We're here to help! Send snail-mail, email supprt@oursite.com or visit us at https://www.oursite.com!")

In [38]:
for t in doc6:
    print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
supprt@oursite.com
or
visit
us
at
https://www.oursite.com
!


In [39]:
doc7 = nlp(u"A 5km NYC cab ride costs $10.30")
for t in doc7:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


In [40]:
# Let's visit St. Louis in the U.S. next year.
doc8 = nlp(u"Let's visit St. Louis in the U.S. next year.")
for t in doc8:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [42]:
len(doc5)

8

In [43]:
doc5

"We're moving to L.A.!"

In [44]:
for t in doc5:
    print(t)

"
We
're
moving
to
L.A.
!
"


In [46]:
len(doc5.vocab)

843

### Named Entities

In [48]:
doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')

for t in doc8:
    print(t.text, end = ' | ')

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 

In [53]:
for e in doc8.ents:
    print(e.text + " - " + e.label_ + " - " + spacy.explain(e.label_))

Apple - ORG - Companies, agencies, institutions, etc.
Hong Kong - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [55]:
# https://spacy.io/usage/visualizers
from spacy import displacy

doc9 = nlp(u'Apple is going to builda U.K. factory for $6 million.')
displacy.render(doc9, style='dep', jupyter=True, options={'distance':100})

In [56]:
# Over the last quarter Apple sold nearly 20 thousand iPhone 
# for a profit of $6 million.
doc10 = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPhone for a profit of $6 million.')
displacy.render(doc10, style='ent', jupyter=True)

In [None]:
displacy.serve(doc10, style='ent')


Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...



127.0.0.1 - - [25/Mar/2022 11:16:17] "GET / HTTP/1.1" 200 1710
127.0.0.1 - - [25/Mar/2022 11:16:17] "GET /favicon.ico HTTP/1.1" 200 1710
