In [69]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [88]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /home/jovyan/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [71]:
pip install --user-u-nltk


Usage:   
  /opt/conda/bin/python -m pip install [options] <requirement specifier> [package-index-options] ...
  /opt/conda/bin/python -m pip install [options] -r <requirements file> [package-index-options] ...
  /opt/conda/bin/python -m pip install [options] [-e] <vcs project url> ...
  /opt/conda/bin/python -m pip install [options] [-e] <local project path> ...
  /opt/conda/bin/python -m pip install [options] <archive url/path> ...

no such option: --user-u-nltk
Note: you may need to restart the kernel to use updated packages.


In [72]:
text = "NLTK is a powerful tool for NLP. it can tokenize sentences and word. It include various NLP libraries for text analysis. "

# Tokenization

In [73]:
from nltk.tokenize import word_tokenize, sent_tokenize

In [74]:
sentences = sent_tokenize(text)
words = word_tokenize(text)
print("Tokenized words:", words)
print()
print("Tokenized sentences:", sentences)

Tokenized words: ['NLTK', 'is', 'a', 'powerful', 'tool', 'for', 'NLP', '.', 'it', 'can', 'tokenize', 'sentences', 'and', 'word', '.', 'It', 'include', 'various', 'NLP', 'libraries', 'for', 'text', 'analysis', '.']

Tokenized sentences: ['NLTK is a powerful tool for NLP.', 'it can tokenize sentences and word.', 'It include various NLP libraries for text analysis.']


# Part-of-Speech tagging

In [75]:
from nltk import pos_tag

pos_tags = pos_tag(words)
print("POS Tags:", pos_tags)

POS Tags: [('NLTK', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('powerful', 'JJ'), ('tool', 'NN'), ('for', 'IN'), ('NLP', 'NNP'), ('.', '.'), ('it', 'PRP'), ('can', 'MD'), ('tokenize', 'VB'), ('sentences', 'NNS'), ('and', 'CC'), ('word', 'NN'), ('.', '.'), ('It', 'PRP'), ('include', 'VBP'), ('various', 'JJ'), ('NLP', 'NNP'), ('libraries', 'NNS'), ('for', 'IN'), ('text', 'JJ'), ('analysis', 'NN'), ('.', '.')]


# Stemming and Lemmatization

In [76]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /home/jovyan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [77]:
from nltk.stem import PorterStemmer, WordNetLemmatizer

stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
stemmed_words = [stemmer.stem(word) for word in words]
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
print("Stemmed Words:", stemmed_words)
print()
print("Lemmatized Word:", lemmatized_words)

Stemmed Words: ['nltk', 'is', 'a', 'power', 'tool', 'for', 'nlp', '.', 'it', 'can', 'token', 'sentenc', 'and', 'word', '.', 'it', 'includ', 'variou', 'nlp', 'librari', 'for', 'text', 'analysi', '.']

Lemmatized Word: ['NLTK', 'is', 'a', 'powerful', 'tool', 'for', 'NLP', '.', 'it', 'can', 'tokenize', 'sentence', 'and', 'word', '.', 'It', 'include', 'various', 'NLP', 'library', 'for', 'text', 'analysis', '.']


# Stop Words Removal

In [78]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [79]:
from nltk.corpus import stopwords

stop_words = set(stopwords.words("english"))
filtered_words = [word for word in words if word.lower() not in stop_words]
print("Filtered Word:", filtered_words)

Filtered Word: ['NLTK', 'powerful', 'tool', 'NLP', '.', 'tokenize', 'sentences', 'word', '.', 'include', 'various', 'NLP', 'libraries', 'text', 'analysis', '.']


# Frequency Distribution

In [80]:
from nltk import FreqDist

freq_dist = FreqDist(words)
print("Frequency Distribution:", freq_dist)

Frequency Distribution: <FreqDist with 20 samples and 24 outcomes>


# Concordance and Similarity

In [81]:
from nltk.text import Text

text_object = Text(words)
concordance_result = text_object.concordance('NLTK')
similar_words = text_object.similar('tool')
print("Concordance Result:", concordance_result)
print()
print("Similar Words:", similar_words)

Displaying 1 of 1 matches:
 NLTK is a powerful tool for NLP . it can 

Concordance Result: None

Similar Words: None


# Sentiment Analysis

In [82]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [83]:
from nltk.sentiment import SentimentIntensityAnalyzer

sia = SentimentIntensityAnalyzer()
sentiment_score = sia.polarity_scores(text)
print("Sentiment Analysis Score:", sentiment_score)

Sentiment Analysis Score: {'neg': 0.0, 'neu': 0.818, 'pos': 0.182, 'compound': 0.4588}


# Named Entity Recognition (NER)

In [89]:
from nltk import ne_chunk

tokens = word_tokenize(text)
pos_tags_for_ner = pos_tag(tokens)
ner_result = ne_chunk(pos_tags_for_ner)
print("NER Result:", ner_result)

NER Result: (S
  (ORGANIZATION NLTK/NNP)
  is/VBZ
  a/DT
  powerful/JJ
  tool/NN
  for/IN
  (ORGANIZATION NLP/NNP)
  ./.
  it/PRP
  can/MD
  tokenize/VB
  sentences/NNS
  and/CC
  word/NN
  ./.
  It/PRP
  include/VBP
  various/JJ
  (ORGANIZATION NLP/NNP)
  libraries/NNS
  for/IN
  text/JJ
  analysis/NN
  ./.)
