# Spacy

## Load Module

In [9]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")

## Tokenize

In [5]:
doc = nlp(u"Apple is looking at buying a U.K. startup for $1 billion")
for token in doc:
  print(token.text)

Apple
is
looking
at
buying
a
U.K.
startup
for
$
1
billion


## Part of Speech & properties

In [8]:
for word in doc:  
  print(word.text, word.pos_, sep = '\t')

Apple	PROPN
is	AUX
looking	VERB
at	ADP
buying	VERB
a	DET
U.K.	PROPN
startup	NOUN
for	ADP
$	SYM
1	NUM
billion	NUM


## Display Despondencies

In [12]:
doc = nlp(u"I want an iPad, Laptop, and a dog.")
displacy.serve(doc, style = "dep")


Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


## Lemmatizing

In [13]:
sentence = "The striped bats are hanging on their feet for best"

# Parse the sentence using the loaded 'en' model object `nlp`
doc = nlp(sentence)

# Extract the lemma for each token and join
" ".join([token.lemma_ for token in doc])
#> 'the strip bat be hang on -PRON- foot for good'

'the stripe bat be hang on -PRON- foot for good'

## Stop Words

In [14]:
from spacy.lang.en.stop_words import STOP_WORDS

print(STOP_WORDS)

{'noone', 'i', 'perhaps', "'ll", 'whose', 'meanwhile', 'whereafter', 'front', 'am', 'nevertheless', 'must', 'whenever', 'whoever', 'become', 'almost', 'yourself', 'became', 'throughout', 'have', 'why', 'also', 'during', 'up', 'part', 'as', 'are', 'full', 'you', '’d', 'our', 'somewhere', 'but', 'hereupon', 'quite', 'wherever', 'myself', 'becomes', 'off', 'amongst', 'upon', 'keep', 'few', 'others', 'very', 'seem', 'be', 'yours', 'done', 'could', 'with', 'anyone', 'does', 'although', 'she', 'this', 'three', 'whereas', 'much', 'get', 'more', 'nine', 'where', 'whole', 'to', 'fifty', 'give', 'than', 'might', 'himself', 'some', 'from', 'using', 'and', 'same', 'herself', 'however', 'unless', 'back', 'along', 'hers', '’re', 'had', 'did', 'always', 'because', 'itself', 'except', 'thus', 'do', 'your', "'ve", 'if', 'per', 'yourselves', 'formerly', 'put', '’m', 'hundred', 'two', 'again', 'thereby', 'seems', 'n’t', "'s", 'becoming', 'at', 'after', '’ll', 'several', 'an', 'via', "'d", 'between', 'the