## Getting Started

In [None]:
!pip3 install spacy
!python3 -m spacy download en

In [1]:
sample = u"I can't imagine spending $3000 for a single bedroom apartment in N.Y.C."

In [2]:
import spacy
from spacy import displacy
from spacy.lang.en.stop_words import STOP_WORDS

nlp = spacy.load('en')
doc = nlp(sample)

## Tokenization

In [3]:
# Print out tokens
for token in doc:
    print(token)

I
ca
n't
imagine
spending
$
3000
for
a
single
bedroom
apartment
in
N.Y.C.


In [4]:
# Store tokens as list, print out
tokens = [token for token in doc]
print(tokens)

[I, ca, n't, imagine, spending, $, 3000, for, a, single, bedroom, apartment, in, N.Y.C.]


## Identifying Stop Words

In [5]:
for word in doc:
    if word.is_stop == True:
        print(word)

ca
for
a
in


## Part-of-speech Tagging

In [6]:
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
          token.shape_, token.is_alpha, token.is_stop)

I -PRON- PRON PRP nsubj X True False
ca can VERB MD aux xx True True
n't not ADV RB neg x'x False False
imagine imagine VERB VB ROOT xxxx True False
spending spend VERB VBG xcomp xxxx True False
$ $ SYM $ nmod $ False False
3000 3000 NUM CD dobj dddd False False
for for ADP IN prep xxx True True
a a DET DT det x True True
single single ADJ JJ amod xxxx True False
bedroom bedroom NOUN NN compound xxxx True False
apartment apartment NOUN NN pobj xxxx True False
in in ADP IN prep xx True True
N.Y.C. n.y.c. PROPN NNP pobj X.X.X. False False


In [7]:
displacy.render(doc, style='dep', jupyter=True, options={'distance': 70})

## Named Entity Recognition

In [8]:
# Print out named entities
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

3000 26 30 MONEY
N.Y.C. 65 71 GPE


In [9]:
displacy.render(doc, style='ent', jupyter=True)