In [4]:
import spacy
nlp = spacy.load('en')

In [5]:
mystring = '"We\'re moving to L.A.!"'

In [6]:
print(mystring)

"We're moving to L.A.!"


In [7]:
doc = nlp(mystring)

In [8]:
# spacy will isolate punctuation that does not form an integral part of a word
for token in doc:
    print(token.text)

"
We
're
moving
to
L.A.
!
"


In [9]:
# punctuation that exists as part of website, etc, will be kept as part
# of token

In [10]:
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com, or visit us at http://www.oursite.com!")

In [11]:
for t in doc2:
    print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
,
or
visit
us
at
http://www.oursite.com
!


In [12]:
doc3 = nlp(u"A 5km NYC cab ride costs $10.30.")

In [13]:
for t in doc3:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30
.


In [14]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")

In [15]:
for t in doc4:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [16]:
len(doc4)

11

In [17]:
len(doc4.vocab)

57852

In [18]:
doc5 = nlp(u"It is better to give than to receive.")

In [19]:
doc5[0]

It

In [20]:
doc5[2:5]

better to give

In [21]:
# docs do NOT support reassignment
doc5[0]= 'test'

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [23]:
doc6 = nlp(u"Apple to build a Hong Kong factory for $6 million.")

In [24]:
for token in doc6:
    print(token.text, end=" | ")

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | . | 

In [28]:
# not recognizing Apple as a named entity
for entity in doc6.ents:
    print(entity)
    print(entity.label_)
    print(str(spacy.explain(entity.label_)))
    print("\n")

Apple
ORG
Companies, agencies, institutions, etc.


Hong Kong
GPE
Countries, cities, states


$6 million
MONEY
Monetary values, including unit




In [29]:
doc7 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")

In [31]:
for chunk in doc7.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
manufacturers


In [33]:
# visualizing tokenization
from spacy import displacy

In [34]:
doc8 = nlp(u"Apple is going to build a U.K. factory for $6 million.")

In [38]:
# 'dep' - syntactic dependency mode
displacy.render(doc8, style='dep', jupyter=True, options={'distance':100})

In [39]:
doc9 = nlp(u"Over the last quarter, Apple sold nearly 20 thousand iPods for a profit of $6 million.")

In [40]:
# 'ent' highlights entities
displacy.render(doc9, style='ent', jupyter=True)

In [41]:
doc10 = nlp(u"This is a sentence.")

In [None]:
displacy.serve(doc10, style='dep')
# go to http://127.0.0.1:5000/