In [1]:
# Import spacy and load the language library
import spacy
nlp = spacy.load("en_core_web_sm")

In [2]:
# Create a strinh that includes opening and closing quotation marks
mystring = '"We\'re moving to L.A.!"'
print(mystring)

"We're moving to L.A.!"


In [3]:
# Create a Doc object and explore tokens
doc = nlp(mystring)
for token in doc:
  print(token.text, end="|")

"|We|'re|moving|to|L.A.|!|"|

Prefixes, Suffixes and Infixes

In [4]:
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at https://www.oursite.com!")
for t in doc2:
  print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
https://www.oursite.com
!


In [5]:
doc3 =  nlp(u"A 5km NYC cab ride costs $10.30")
for t in doc3:
  print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


Exception

In [15]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")
for t in doc4:
  print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


Counting tokens

In [16]:
len(doc)

8

In [8]:
len(doc.vocab)

794

Tokens can be received using slicing and index position

In [9]:
doc5 = nlp(u"It is better to give than to receive")
# Retrieve the third token
doc5[2]

better

In [17]:
# Retrieve the token from middle
doc5[2:5]

better to give

In [20]:
# Receive the last four tokens:
doc5[-4:]

give than to receive

Tokens cannot be reassigned

In [11]:
doc6 = nlp(u"My dinner was horrible.")
doc7 = nlp(u"Your dinner was delicious.")

In [23]:
# Try to change "My dinnerv was horrible" to "My dinner was delicious"
# do not change the token as this will thrown an error
#doc6[3] = doc7[3]


Name entities

In [24]:
doc8 = nlp(u"Apple to build a Hong Kong factory for $6 million")
for token in doc8:
  print(token.text, end = "|")
print("\n----")
for ent in doc8.ents:
  print(ent.text+"-"+ent.label_+"-"+str(spacy.explain(ent.label_)))#We can remove the str

Apple|to|build|a|Hong|Kong|factory|for|$|6|million|
----
Apple-ORG-Companies, agencies, institutions, etc.
Hong Kong-GPE-Countries, cities, states
$6 million-MONEY-Monetary values, including unit


In [25]:
len(doc8.ents)

3

Noun Chunks

In [26]:
doc9 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")
for chunk in doc9.noun_chunks:
  print(chunk.text)

Autonomous cars
insurance liability
manufacturers


In [27]:
doc10 = nlp(u"Red cars do not carry higher insurance rates.")
for chunk in doc10.noun_chunks:
  print(chunk.text)

Red cars
higher insurance rates


In [28]:
doc11 = nlp(u"He was a one-eyed, one-horned, flying, purple people-eater.")
for chunk in doc11.noun_chunks:
  print(chunk.text)

He
a one-eyed, one-horned, flying, purple people-eater


##Built-in Visualizers
 Visualizing the dependency parse

In [29]:
from spacy import displacy

doc = nlp("Apple is going to build a U.K. factory for $6 million.")
displacy.render(doc, style="dep", jupyter=True, options={"distance":110})  #dep=dependency parse

## Visualizing the entity recogniser

In [30]:
doc = nlp(u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.")
displacy.render(doc, style="ent", jupyter=True)