In [None]:
#spaCy is a powerful and efficient library for natural language processing (NLP) in Python. 

In [None]:
#It provides pre-trained models and a variety of tools for common NLP tasks

In [3]:
!python3.12 -m pip install --upgrade pip # Upgrade pip



In [4]:
!python3.12 -m pip install spacy # Install spacy



In [5]:
!python3.12 -m spacy download en_core_web_sm # Download spacy model

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [6]:
import spacy

In [7]:
nlp = spacy.load('en_core_web_sm')

In [8]:
doc = nlp("SpaCy isn't a powerful NLP library in Python.")

In [9]:
for token in doc:
  print(token.text, token.pos_, token.dep_)
    

SpaCy PROPN nsubj
is AUX ROOT
n't PART neg
a DET det
powerful ADJ amod
NLP PROPN compound
library NOUN attr
in ADP prep
Python PROPN pobj
. PUNCT punct


In [10]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x118c4bad0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x118c4b2f0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x118c77840>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x118f3be10>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x118f37fd0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x118c77680>)]

In [11]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [12]:
doc2 = nlp(u"Tesla isn't  looking into startups anymore.")

In [13]:
for token in doc2:
  print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
n't PART neg
  SPACE dep
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [14]:
doc2[0]


Tesla

In [15]:
spacy.explain('nsubj')

'nominal subject'

In [16]:
my_string = "Alas ! It was a great escape . "

In [17]:
my_string

'Alas ! It was a great escape . '

In [18]:
doc3 = nlp(my_string)

In [19]:
for token in doc3:
  print(token.text, end = ' | ')

Alas | ! | It | was | a | great | escape | . | 

In [20]:
doc4 = nlp(u"For any further queries please contact us through our customer care mail qwerty.uiop@gmail.com/!")

In [21]:
for t  in doc4:
  print(t)

For
any
further
queries
please
contact
us
through
our
customer
care
mail
qwerty.uiop@gmail.com/
!


In [23]:
doc5 = nlp(u'A 5km UBER cab costs $5')

In [25]:
for t in doc5:
  print(t)

A
5
km
UBER
cab
costs
$
5


In [26]:
dummy = "A 5km UBER cab costs $5"

In [27]:
for x in dummy:
  print(x)

A
 
5
k
m
 
U
B
E
R
 
c
a
b
 
c
o
s
t
s
 
$
5


# NOUN CHUNKS 

In [None]:
# A noun chunk or noun phrase is a group of words that together function as a noun in a sentence, typically consisting of a noun and its modifiers.

In [28]:
doc6 = nlp(u'The quick brown fox jumps over the lazy dog.')

In [30]:
for chunk in doc6:
    print(chunk.text)

The
quick
brown
fox
jumps
over
the
lazy
dog
.


In [31]:
sentence = "The quick brown fox jumps over the lazy dog."

In [32]:
doc = nlp(sentence)

In [33]:
for chunk in doc.noun_chunks:
    print(chunk.text)

The quick brown fox
the lazy dog


In [34]:
doc7 = nlp (u'The implementation of new algorithms improved the performance of the system')

In [35]:
for chunk in doc7.noun_chunks:
    print(chunk.text)

The implementation
new algorithms
the performance
the system


# LEMMETIZATION 

In [None]:

# Lemmatization is the process of grouping together the different inflected forms of a word so they can be analyzed as a single item

In [36]:
doc8 = nlp (u'The children are playing in the gardens')

In [44]:
for token in doc8:
        print(f" {token.lemma_}")


 the
 child
 be
 play
 in
 the
 garden


# Stemming

In [None]:
# Stemming is the process of reducing words to their base or root form, typically by removing suffixes. 

In [45]:
doc9 = nlp (u'The books were lying on the table')

In [None]:
# Lemmatization is similar to stemming but it brings context to the words. 

 # Using French language 

In [49]:
import spacy

In [52]:
!python3.12 -m spacy download fr_core_news_sm

Collecting fr-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.7.0/fr_core_news_sm-3.7.0-py3-none-any.whl (16.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.3/16.3 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: fr-core-news-sm
Successfully installed fr-core-news-sm-3.7.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('fr_core_news_sm')


In [53]:
import spacy

In [54]:
nlp = spacy.load("fr_core_news_sm")

In [55]:
doc10 = nlp ( u'Les enfants jouent dans les jardins.')

In [56]:
for token in doc10:
  print(token.text, token.pos_, token.dep_)

Les DET det
enfants NOUN nsubj
jouent NOUN ROOT
dans ADP case
les DET det
jardins NOUN obl:arg
. PUNCT punct


In [57]:
for token in doc10:# tokenization 
  print(token.text, end = ' | ')

Les | enfants | jouent | dans | les | jardins | . | 

In [58]:
for t  in doc10:
  print(t)

Les
enfants
jouent
dans
les
jardins
.


In [60]:
dummy = "la crème glacée peut aider à réduire les symptômes d'anxiété et de dépression"

In [61]:
for x in dummy:
  print(x)

l
a
 
c
r
è
m
e
 
g
l
a
c
é
e
 
p
e
u
t
 
a
i
d
e
r
 
à
 
r
é
d
u
i
r
e
 
l
e
s
 
s
y
m
p
t
ô
m
e
s
 
d
'
a
n
x
i
é
t
é
 
e
t
 
d
e
 
d
é
p
r
e
s
s
i
o
n


In [62]:
len(doc10)

7

In [63]:
len(doc.vocab)

820

In [64]:
doc10[2:5]

jouent dans les

In [69]:
doc11 = nlp(u"Mangez sainement, restez en bonne santé !")

In [70]:
for chunk in doc11.noun_chunks:
    print(chunk.text)

Mangez


In [71]:
doc12 = nlp (u"faites de l'exercice quotidiennement pour rester en forme !")

In [72]:
for token in doc12:
        print(f" {token.lemma_}")

 fait
 de
 le
 exercice
 quotidiennement
 pour
 rester
 en
 forme
 !


In [73]:
from spacy import displacy

In [75]:
doc = nlp(u"Faites de l'exercice quotidiennement pour rester en forme !")

In [77]:
displacy.render(doc, style='dep', jupyter=True, options={'distance': 120})