In [None]:
#download necessary models
!python3 -m spacy download ja_core_news_sm --quiet
!python3 -m spacy download es_core_news_sm --quiet
!python3 -m spacy download en_core_web_sm --quiet

[K     |████████████████████████████████| 12.0 MB 5.2 MB/s 
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('ja_core_news_sm')
[K     |████████████████████████████████| 12.9 MB 5.0 MB/s 
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('es_core_news_sm')
[K     |████████████████████████████████| 12.8 MB 5.2 MB/s 
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


# Import Statements

In [None]:
# importing spacy package for python to process language data
import spacy

#import displacy to view dependency parse
from spacy import displacy 



# Loading SpaCy models


In [None]:
#load in a small english model
small_eng_model=spacy.load('en_core_web_sm')

In [None]:
#load in a Japanese model
small_js_model=spacy.load('ja_core_news_sm')

In [None]:
#load in a Spanish model
small_es_model=spacy.load('es_core_news_sm')

# Creating a Sample Text for Practice


In [None]:
sample_text='Dogs are domesticated mammals, not natural wild animals. They were originally bred from wolves.'

# Calling the Loaded Model on the Sample Text

In [None]:
#call the small english model we loaded on our sample text, which outputs into practice_doc
practice_doc=small_eng_model(sample_text)

type(practice_doc)

spacy.tokens.doc.Doc

# Tokenization

In [None]:
#loop through the doc object, taking the text of the tokens
for token in practice_doc:
  print(token.text)

Dogs
are
domesticated
mammals
,
not
natural
wild
animals
.
They
were
originally
bred
from
wolves
.


# Lemmas


In [None]:
#loop through the doc object, taking the lemma using the token.lemma_ attribute
for token in practice_doc:
  print(token.lemma_)

dog
be
domesticate
mammal
,
not
natural
wild
animal
.
they
be
originally
breed
from
wolf
.


In [None]:
# loop through the doc object, taking the part-of-speech for each token using the pos_ attribute
for token in practice_doc:
  print(token.text,'\t',token.pos_)

Dogs 	 NOUN
are 	 AUX
domesticated 	 VERB
mammals 	 NOUN
, 	 PUNCT
not 	 PART
natural 	 ADJ
wild 	 ADJ
animals 	 NOUN
. 	 PUNCT
They 	 PRON
were 	 AUX
originally 	 ADV
bred 	 VERB
from 	 ADP
wolves 	 NOUN
. 	 PUNCT


# Dependency Parsing


In [None]:
#create a new sample text for parsing
parse_text='Jim is going to school at the University of North Texas. '

#call the small english model to label the parse text
parse_doc=small_eng_model(parse_text)

#use displacy to render the depedency parse in our notebook
displacy.render(parse_doc,style='dep',jupyter=True)

# Named-Entity Recognition

In [None]:
#create a new sample text for named entity recognition
ner_text='Jim is going to school at the University of North Texas. His sister Jane is an engineer at Google.'
#call small english model
ner_doc=small_eng_model(ner_text)
print(ner_doc.ents)

(Jim, the University of North Texas, Jane, Google)
