SPacy- NLP package

In [1]:
!pip install spacy



In [2]:
import spacy
from spacy.lang.en import English
nlp = English()

In [3]:
doc = nlp("I am stressed and I have a lot of thoughts in my mind. I want to relax.")
for token in doc:
    print(token.text)

doc.text

I
am
stressed
and
I
have
a
lot
of
thoughts
in
my
mind
.
I
want
to
relax
.


'I am stressed and I have a lot of thoughts in my mind. I want to relax.'

In [4]:
span= doc[9:13]
print(span.text)

thoughts in my mind


In [5]:
for word in doc:
    lexeme = doc.vocab[word.text]
    print(lexeme.text, lexeme.orth, lexeme.orth_, lexeme.shape_, lexeme.prefix_, lexeme.suffix_,
            lexeme.is_alpha, lexeme.is_digit, lexeme.is_title, lexeme.lang_)

I 4690420944186131903 I X I I True False True en
am 959164148857638496 am xx a am True False False en
stressed 8863402319515883670 stressed xxxx s sed True False False en
and 2283656566040971221 and xxx a and True False False en
I 4690420944186131903 I X I I True False True en
have 14692702688101715474 have xxxx h ave True False False en
a 11901859001352538922 a x a a True False False en
lot 6920515201346452032 lot xxx l lot True False False en
of 886050111519832510 of xx o of True False False en
thoughts 12320243888481754458 thoughts xxxx t hts True False False en
in 3002984154512732771 in xx i in True False False en
my 227504873216781231 my xx m my True False False en
mind 8811887512194128535 mind xxxx m ind True False False en
. 12646065887601541794 . . . . False False False en
I 4690420944186131903 I X I I True False True en
want 7597692042947428029 want xxxx w ant True False False en
to 3791531372978436496 to xx t to True False False en
relax 17517521688947869998 relax xxxx r lax 

nlp model

In [6]:
nlp = spacy.load("en_core_web_sm")

In [7]:
doc = nlp("I am stressed and I have a lot of thoughts in my mind. I want to relax.")
for token in doc:
    print(token.text,"\t", token.pos_,"\t", token.tag_)

I 	 PRON 	 PRP
am 	 AUX 	 VBP
stressed 	 VERB 	 VBN
and 	 CCONJ 	 CC
I 	 PRON 	 PRP
have 	 VERB 	 VBP
a 	 DET 	 DT
lot 	 NOUN 	 NN
of 	 ADP 	 IN
thoughts 	 NOUN 	 NNS
in 	 ADP 	 IN
my 	 PRON 	 PRP$
mind 	 NOUN 	 NN
. 	 PUNCT 	 .
I 	 PRON 	 PRP
want 	 VERB 	 VBP
to 	 PART 	 TO
relax 	 VERB 	 VB
. 	 PUNCT 	 .


explanation of tag

In [8]:
spacy.explain("PRP")

'pronoun, personal'

In [9]:
from spacy.lang.en.stop_words import STOP_WORDS
print(STOP_WORDS)

{'already', 'next', 'until', 'quite', 'did', 'put', 'doing', 'hereby', 'almost', 'since', 'off', 'thence', 'whom', 'nine', 'more', 'they', 'same', 'get', 'various', 'elsewhere', 'mine', 'some', 'who', 'again', 'too', 'mostly', 'yourselves', '‘re', "'ve", 'by', 'amount', 'except', 'nobody', 'hundred', 'enough', 'make', 'where', 'less', 'thru', 'my', 'another', 'thus', 'below', 'sometimes', 'when', 'own', 'over', 'me', 'top', 'is', 'we', 'whereby', 'because', 'most', 'whereafter', 'during', 'several', 'for', 'serious', 'five', 'your', 'those', 'her', 'than', 'empty', 'either', 'an', 'none', 'something', 'even', 'was', 'still', 'twelve', 'in', 'whole', 'back', 'ten', 'whose', 'from', 'onto', 'rather', "'d", 'nowhere', 'seem', 'themselves', 'say', 'name', 'thereafter', 'whoever', 'seemed', 'neither', 'or', 'but', 'though', "'m", '‘d', 'just', 'whatever', 'hereafter', 'wherein', 'up', 'being', 'ourselves', 'four', 'a', 'herself', 'no', 'regarding', 'part', 'anyway', 'beyond', '’d', 'nor', '

Removing stop words

In [12]:
filtered_sentence =[]
doc = nlp("I am stressed and I have a lot of thoughts in my mind. I want to relax.")
for word in doc:
    if  word.is_stop == False:
        filtered_sentence.append(word)
print(doc.text)
print(filtered_sentence)

I am stressed and I have a lot of thoughts in my mind. I want to relax.
[stressed, lot, thoughts, mind, ., want, relax, .]


Lemmatization - identifying the base form of the word

In [11]:
for token in doc:
    print(token.text , token.lemma_)

I I
am be
stressed stress
and and
I I
have have
a a
lot lot
of of
thoughts thought
in in
my my
mind mind
. .
I I
want want
to to
relax relax
. .


In [13]:
for token in doc.noun_chunks:
    print(token.text)

I
I
a lot
thoughts
my mind
I


In [14]:
for token in doc.noun_chunks:
    print(token.root.text)

I
I
lot
thoughts
mind
I


In [15]:
for token in doc.noun_chunks:
    print(token.root.head.text)

stressed
have
have
of
in
want


In [16]:
for word in doc:
    print(word.text, word.dep_)

I nsubjpass
am auxpass
stressed ROOT
and cc
I nsubj
have conj
a det
lot dobj
of prep
thoughts pobj
in prep
my poss
mind pobj
. punct
I nsubj
want ROOT
to aux
relax xcomp
. punct


In [17]:
spacy.displacy.render(doc, style='dep',jupyter= True)