<a href="https://colab.research.google.com/github/ppriyanshu26/Colab/blob/main/Basic%20NLP%20Techniques/SpaCy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# import modules

import spacy

!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m82.9 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
# Load Package

nlp = spacy.load("en_core_web_sm")

In [3]:
# Tokenization

text = "SpaCy is an amazing python library. Lets study about tokenization."
doc = nlp(text)

print("Tokens:")
for token in doc:
    print(token.text)

Tokens:
SpaCy
is
an
amazing
python
library
.
Lets
study
about
tokenization
.


In [4]:
# POS Tagging

text = "The quick brown fox jumps over the lazy dog."
doc = nlp(text)

print(f"{'Token':<15}{'POS':<10}{'TAG':<7}{'Explanation'}")
print("-"*60)
for i in doc:
    print(f"{i.text:<15}{i.pos_:<10}{i.tag_:<7}{spacy.explain(i.tag_)}")

Token          POS       TAG    Explanation
------------------------------------------------------------
The            DET       DT     determiner
quick          ADJ       JJ     adjective (English), other noun-modifier (Chinese)
brown          ADJ       JJ     adjective (English), other noun-modifier (Chinese)
fox            NOUN      NN     noun, singular or mass
jumps          VERB      VBZ    verb, 3rd person singular present
over           ADP       IN     conjunction, subordinating or preposition
the            DET       DT     determiner
lazy           ADJ       JJ     adjective (English), other noun-modifier (Chinese)
dog            NOUN      NN     noun, singular or mass
.              PUNCT     .      punctuation mark, sentence closer


In [5]:
# Dependency Parsing

text = "The quick brown fox jumps over the lazy dog."
doc = nlp(text)

print(f"{'Token':<12}{'Head':<12}{'Dep':<15}{'POS':<7}{'Explanation'}")
print("-"*70)
for i in doc:
    print(f"{i.text:<12}{i.head.text:<12}{i.dep_:<15}{i.pos_:<7}{spacy.explain(i.tag_)}")

Token       Head        Dep            POS    Explanation
----------------------------------------------------------------------
The         fox         det            DET    determiner
quick       fox         amod           ADJ    adjective (English), other noun-modifier (Chinese)
brown       fox         amod           ADJ    adjective (English), other noun-modifier (Chinese)
fox         jumps       nsubj          NOUN   noun, singular or mass
jumps       jumps       ROOT           VERB   verb, 3rd person singular present
over        jumps       prep           ADP    conjunction, subordinating or preposition
the         dog         det            DET    determiner
lazy        dog         amod           ADJ    adjective (English), other noun-modifier (Chinese)
dog         over        pobj           NOUN   noun, singular or mass
.           jumps       punct          PUNCT  punctuation mark, sentence closer


In [6]:
# Lemmatization

text = "The children are running faster and better than yesterday."
doc = nlp(text)

print(f"{'Token':<12}{'Lemma':<12}{'POS':<7}{'Explanation'}")
print("-"*50)
for i in doc:
    print(f"{i.text:<12}{i.lemma_:<12}{i.pos_:<7}{spacy.explain(i.tag_)}")

Token       Lemma       POS    Explanation
--------------------------------------------------
The         the         DET    determiner
children    child       NOUN   noun, plural
are         be          AUX    verb, non-3rd person singular present
running     run         VERB   verb, gerund or present participle
faster      fast        ADV    adverb, comparative
and         and         CCONJ  conjunction, coordinating
better      well        ADV    adverb, comparative
than        than        ADP    conjunction, subordinating or preposition
yesterday   yesterday   NOUN   noun, singular or mass
.           .           PUNCT  punctuation mark, sentence closer


In [7]:
# NER

text = "Apple was founded by Steve Jobs and is headquartered in California."
doc = nlp(text)

print(f"{'Entitiy':<25}{'Label':<15}{'Explanation'}")
print("-"*70)
for i in doc.ents:
    print(f"{i.text:<25}{i.label_:<15}{spacy.explain(i.label_)}")

Entitiy                  Label          Explanation
----------------------------------------------------------------------
Apple                    ORG            Companies, agencies, institutions, etc.
Steve Jobs               PERSON         People, including fictional
California               GPE            Countries, cities, states
