In [None]:
# Install libraries
# !pip install nltk
# !pip install spacy

In [1]:
# Get spacy version
!pip show spacy | grep Version

Version: 3.7.2


In [2]:
# Get NLTK version 
!pip show nltk | grep Version

Version: 3.8.1
License: Apache License, Version 2.0


In [3]:
# Import the library
import nltk

In [4]:
text = "Ben relocated to Paris last year to pursue his passion. His currently enrolled in a comprehensive course on Natural Language Processing"
text

'Ben relocated to Paris last year to pursue his passion. His currently enrolled in a comprehensive course on Natural Language Processing'

In [5]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/mihaela/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [6]:
from nltk.tokenize import word_tokenize
tokens = word_tokenize(text)
print(tokens)

['Ben', 'relocated', 'to', 'Paris', 'last', 'year', 'to', 'pursue', 'his', 'passion', '.', 'His', 'currently', 'enrolled', 'in', 'a', 'comprehensive', 'course', 'on', 'Natural', 'Language', 'Processing']


In [7]:
# Stemming
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(token) for token in tokens]
print(stemmed_words)

['ben', 'reloc', 'to', 'pari', 'last', 'year', 'to', 'pursu', 'hi', 'passion', '.', 'hi', 'current', 'enrol', 'in', 'a', 'comprehens', 'cours', 'on', 'natur', 'languag', 'process']


In [8]:
# Part-of-speech tagging
from nltk import pos_tag
text_pos_tag = pos_tag(tokens)
print(text_pos_tag)

[('Ben', 'NNP'), ('relocated', 'VBD'), ('to', 'TO'), ('Paris', 'NNP'), ('last', 'JJ'), ('year', 'NN'), ('to', 'TO'), ('pursue', 'VB'), ('his', 'PRP$'), ('passion', 'NN'), ('.', '.'), ('His', 'PRP$'), ('currently', 'RB'), ('enrolled', 'VBN'), ('in', 'IN'), ('a', 'DT'), ('comprehensive', 'JJ'), ('course', 'NN'), ('on', 'IN'), ('Natural', 'NNP'), ('Language', 'NNP'), ('Processing', 'NNP')]


In [9]:
nltk.help.upenn_tagset("NNP")

NNP: noun, proper, singular
    Motown Venneboerger Czestochwa Ranzer Conchita Trumplane Christos
    Oceanside Escobar Kreisler Sawyer Cougar Yvette Ervin ODI Darryl CTCA
    Shannon A.K.C. Meltex Liverpool ...


In [10]:
# Named Entity Recognition
nltk.download('maxent_ne_chunker')
nltk.download('words')
entities = nltk.ne_chunk(text_pos_tag)
print(entities)

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /Users/mihaela/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!


True

[nltk_data] Downloading package words to /Users/mihaela/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

(S
  (PERSON Ben/NNP)
  relocated/VBD
  to/TO
  (GPE Paris/NNP)
  last/JJ
  year/NN
  to/TO
  pursue/VB
  his/PRP$
  passion/NN
  ./.
  His/PRP$
  currently/RB
  enrolled/VBN
  in/IN
  a/DT
  comprehensive/JJ
  course/NN
  on/IN
  (ORGANIZATION Natural/NNP Language/NNP)
  Processing/NNP)
