In [30]:
!pip install spacy

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import spacy

In [4]:
!python -m spacy download en_core_web_sm

Defaulting to user installation because normal site-packages is not writeable
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     --------------------------------------- 0.0/12.8 MB 162.5 kB/s eta 0:01:19
     --------------------------------------- 0.0/12.8 MB 162.5 kB/s eta 0:01:19
     --------------------------------------- 0.0/12.8 MB 145.2 kB/s eta 0:01:28
     --------------------------------------- 0.1/12.8 MB 302.7 kB/s eta 0:00:43
     --------------------------------------- 0.1/12.8 MB 522.9 kB/s eta 0:00:25
      -------------------------------------- 0.2/12.8 MB 769.9 kB/s eta 0:00:17
      --------------------------------------- 0.3/

In [5]:
nlp = spacy.load('en_core_web_sm')

In [7]:
introduction_text = ('This tutorial is about Natural Language Processing in Spacy.')

In [8]:
introduction_doc = nlp(introduction_text)

In [6]:
# Extract tokens for the given doc

In [9]:
print ([token.text for token in introduction_doc])
['This', 'tutorial', 'is', 'about', 'Natural', 'Language',
'Processing', 'in', 'Spacy', '.']

['This', 'tutorial', 'is', 'about', 'Natural', 'Language', 'Processing', 'in', 'Spacy', '.']


['This',
 'tutorial',
 'is',
 'about',
 'Natural',
 'Language',
 'Processing',
 'in',
 'Spacy',
 '.']

In [10]:
about_text = ('Hello all, I am Dr. Chetana. Gus Proto is a Python developer currently'
...               ' working for a London-based Fintech'
...               ' company. He is interested in learning'
...               ' Natural Language Processing.')

In [11]:
about_doc = nlp(about_text)

In [12]:
sentences = list(about_doc.sents)

In [13]:
len(sentences)

3

In [14]:
for sentence in sentences:
    print (sentence)

Hello all, I am Dr. Chetana.
Gus Proto is a Python developer currently working for a London-based Fintech company.
He is interested in learning Natural Language Processing.


In [15]:
for token in about_doc:
    print (token, token.idx)

Hello 0
all 6
, 9
I 11
am 13
Dr. 16
Chetana 20
. 27
Gus 29
Proto 33
is 39
a 42
Python 44
developer 51
currently 61
working 71
for 79
a 83
London 85
- 91
based 92
Fintech 98
company 106
. 113
He 115
is 118
interested 121
in 132
learning 135
Natural 144
Language 152
Processing 161
. 171


In [16]:
spacy_stopwords = spacy.lang.en.stop_words.STOP_WORDS

In [17]:
len(spacy_stopwords)

326

In [18]:
for stop_word in list(spacy_stopwords)[:10]:
    print(stop_word)

through
whole
last
were
four
give
keep
am
before
hundred


In [19]:
for token in about_doc:
    if not token.is_stop:
        print (token)

Hello
,
Dr.
Chetana
.
Gus
Proto
Python
developer
currently
working
London
-
based
Fintech
company
.
interested
learning
Natural
Language
Processing
.


In [18]:
#Lemmatization

In [20]:
conference_help_text = ('Gus is helping organize a developer'
...     'conference on Applications of Natural Language'
...     ' Processing. He keeps organizing local Python meetups'
...     ' and several internal talks at his workplace.')

In [21]:
conference_help_doc = nlp(conference_help_text)

In [22]:
for token in conference_help_doc:
    print (token, token.lemma_)

Gus Gus
is be
helping helping
organize organize
a a
developerconference developerconference
on on
Applications Applications
of of
Natural Natural
Language Language
Processing Processing
. .
He he
keeps keep
organizing organize
local local
Python Python
meetups meetup
and and
several several
internal internal
talks talk
at at
his his
workplace workplace
. .


In [23]:
# Part of Speech Tagging
for token in about_doc:
    print (token, token.tag_, token.pos_, spacy.explain(token.tag_))

Hello UH INTJ interjection
all DT PRON determiner
, , PUNCT punctuation mark, comma
I PRP PRON pronoun, personal
am VBP AUX verb, non-3rd person singular present
Dr. NNP PROPN noun, proper singular
Chetana NNP PROPN noun, proper singular
. . PUNCT punctuation mark, sentence closer
Gus NNP PROPN noun, proper singular
Proto NNP PROPN noun, proper singular
is VBZ AUX verb, 3rd person singular present
a DT DET determiner
Python NNP PROPN noun, proper singular
developer NN NOUN noun, singular or mass
currently RB ADV adverb
working VBG VERB verb, gerund or present participle
for IN ADP conjunction, subordinating or preposition
a DT DET determiner
London NNP PROPN noun, proper singular
- HYPH PUNCT punctuation mark, hyphen
based VBN VERB verb, past participle
Fintech NNP PROPN noun, proper singular
company NN NOUN noun, singular or mass
. . PUNCT punctuation mark, sentence closer
He PRP PRON pronoun, personal
is VBZ AUX verb, 3rd person singular present
interested JJ ADJ adjective (English),

In [24]:
nouns = []
adjectives = []
for token in about_doc:
    if token.pos_ == 'NOUN':
        nouns.append(token)
    if token.pos_ == 'ADJ':
        adjectives.append(token)

In [25]:
nouns

[developer, company]

In [26]:
adjectives

[interested]

In [28]:
from spacy import displacy

In [29]:
about_interest_text = ('He is interested in learning'
...     ' Natural Language Processing.')

In [28]:
about_interest_doc = nlp(about_interest_text)

In [None]:
displacy.serve(about_interest_doc, style='dep')




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

