In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
mystring = 'We\'re moving to L.A.!'

In [4]:
doc = nlp(mystring)

In [5]:
for token in doc:
    print(token.text)

We
're
moving
to
L.A.
!


In [6]:
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com")

In [7]:
for token in doc2:
    print(token.text)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http://www.oursite.com


In [8]:
doc3 = nlp(u"A 5 km NYC cab ride costs $10.30")

In [9]:
for t in doc3:
    print(t.text)

A
5
km
NYC
cab
ride
costs
$
10.30


In [10]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")

In [11]:
for t in doc4:
    print(t.text)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [12]:
len(doc4.vocab)

57852

In [13]:
doc5 = nlp(u"It is better to give than receive")

In [14]:
doc5[0]

It

In [15]:
doc6 = nlp(u"Apple to build a Hong Kong factory for $6 million")

In [16]:
for token in doc6:
    print(token.text, end='|')

Apple|to|build|a|Hong|Kong|factory|for|$|6|million|

In [17]:
for entity in doc6.ents:
    print(entity)
    print(entity.label_)
    print(spacy.explain(entity.label_))
    print('\n')

Apple
ORG
Companies, agencies, institutions, etc.


Hong Kong
GPE
Countries, cities, states


$6 million
MONEY
Monetary values, including unit




In [18]:
doc7 = nlp(u"Autonomous cars shift insurance liability towards manufacturers.")

In [19]:
for chunk in doc7.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
manufacturers


### Visualization of Spacy

In [20]:
from spacy import displacy

In [21]:
doc8 = nlp(u"Apple is going to build a U.K. factory for $6 million.")

In [22]:
displacy.render(doc8, style='ent', jupyter=True, options={'distance':110})

In [23]:
doc9 = nlp("Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.")

In [24]:
displacy.render(doc9, style='ent', jupyter=True)

In [25]:
# displacy.serve(doc9, style='ent')

### Stemming

In [27]:
import nltk

In [28]:
from nltk.stem.porter import PorterStemmer

In [29]:
p_stemmer = PorterStemmer()

In [30]:
words = ['run', 'runner', 'ran', 'runs', 'easily', 'fairly']

In [31]:
for word in words:
    print(word + '---->' + p_stemmer.stem(word))

run---->run
runner---->runner
ran---->ran
runs---->run
easily---->easili
fairly---->fairli


In [33]:
from nltk.stem.snowball import SnowballStemmer

In [34]:
s_stemmer = SnowballStemmer(language='english')

In [36]:
for word in words:
    print(word + '---->' + s_stemmer.stem(word))

run---->run
runner---->runner
ran---->ran
runs---->run
easily---->easili
fairly---->fair


### Lemmatization