<a href="https://colab.research.google.com/github/priyankadevidaspawar/DL_PRACTICALS/blob/main/Useful_NLP_Libraries_NlTK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install NLTK using pip
!pip install nltk



In [2]:
# Import NLTK
import nltk
# Download necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


True

In [3]:
text = "NLTK is a powerful tool for natural language processing. It can tokenize sentences and words. NLTK includes various NLP libraries for text analysis."

# Tokenization

In [4]:
from nltk.tokenize import word_tokenize, sent_tokenize

sentences = sent_tokenize(text)
words = word_tokenize(text)
print("Words:", words)

Words: ['NLTK', 'is', 'a', 'powerful', 'tool', 'for', 'natural', 'language', 'processing', '.', 'It', 'can', 'tokenize', 'sentences', 'and', 'words', '.', 'NLTK', 'includes', 'various', 'NLP', 'libraries', 'for', 'text', 'analysis', '.']


# Part-of-speech tagging

In [5]:
from nltk import pos_tag

pos_tags = pos_tag(words)
print("POS Tags:", pos_tags)

POS Tags: [('NLTK', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('powerful', 'JJ'), ('tool', 'NN'), ('for', 'IN'), ('natural', 'JJ'), ('language', 'NN'), ('processing', 'NN'), ('.', '.'), ('It', 'PRP'), ('can', 'MD'), ('tokenize', 'VB'), ('sentences', 'NNS'), ('and', 'CC'), ('words', 'NNS'), ('.', '.'), ('NLTK', 'NNP'), ('includes', 'VBZ'), ('various', 'JJ'), ('NLP', 'NNP'), ('libraries', 'NNS'), ('for', 'IN'), ('text', 'JJ'), ('analysis', 'NN'), ('.', '.')]


In [None]:
## 'I' is tagged as a personal pronoun ('PRP').
####'love' is tagged as a verb ('VBP').
##'natural' is tagged as an adjective ('JJ').
##'language' and 'processing' are both tagged as nouns ('NN').
##in=prepostion


# Stemming and Lemmatization

In [7]:
from nltk.stem import PorterStemmer, WordNetLemmatizer
nltk.download('wordnet')
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
stemmed_words = [stemmer.stem(word) for word in words]
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
print("Stemmed Words:", stemmed_words)
print("Lemmatized Words:", lemmatized_words)

[nltk_data] Downloading package wordnet to /root/nltk_data...


Stemmed Words: ['nltk', 'is', 'a', 'power', 'tool', 'for', 'natur', 'languag', 'process', '.', 'it', 'can', 'token', 'sentenc', 'and', 'word', '.', 'nltk', 'includ', 'variou', 'nlp', 'librari', 'for', 'text', 'analysi', '.']
Lemmatized Words: ['NLTK', 'is', 'a', 'powerful', 'tool', 'for', 'natural', 'language', 'processing', '.', 'It', 'can', 'tokenize', 'sentence', 'and', 'word', '.', 'NLTK', 'includes', 'various', 'NLP', 'library', 'for', 'text', 'analysis', '.']


# Stop words removal

In [9]:
from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words("english"))
filtered_words = [word for word in words if word.lower() not in stop_words]
print("Filtered Words:", filtered_words)

Filtered Words: ['NLTK', 'powerful', 'tool', 'natural', 'language', 'processing', '.', 'tokenize', 'sentences', 'words', '.', 'NLTK', 'includes', 'various', 'NLP', 'libraries', 'text', 'analysis', '.']


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


# Frequency Distribution

In [10]:
from nltk import FreqDist

freq_dist = FreqDist(words)
print("Frequency Distribution:", freq_dist)

Frequency Distribution: <FreqDist with 22 samples and 26 outcomes>


# Concordance and Similarity

In [11]:
from nltk.text import Text

text_object = Text(words)
concordance_result = text_object.concordance("NLTK")
similar_words = text_object.similar("tool")
print("Concordance Result:", concordance_result)
print("Similar Words:", similar_words)

Displaying 2 of 2 matches:
 NLTK is a powerful tool for natural langu
t can tokenize sentences and words . NLTK includes various NLP libraries for t

Concordance Result: None
Similar Words: None


# Sentiment Analysis

In [13]:
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()
sentiment_score = sia.polarity_scores(text)
print("Sentiment Analysis Score:", sentiment_score)

Sentiment Analysis Score: {'neg': 0.0, 'neu': 0.745, 'pos': 0.255, 'compound': 0.6705}


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


# Named Entity Recognition (NER)

In [14]:
from nltk import ne_chunk

tokens = word_tokenize(text)
pos_tags_for_ner = pos_tag(tokens)
ner_result = ne_chunk(pos_tags_for_ner)
print("NER Result:", ner_result)

NER Result: (S
  (ORGANIZATION NLTK/NNP)
  is/VBZ
  a/DT
  powerful/JJ
  tool/NN
  for/IN
  natural/JJ
  language/NN
  processing/NN
  ./.
  It/PRP
  can/MD
  tokenize/VB
  sentences/NNS
  and/CC
  words/NNS
  ./.
  (ORGANIZATION NLTK/NNP)
  includes/VBZ
  various/JJ
  (ORGANIZATION NLP/NNP)
  libraries/NNS
  for/IN
  text/JJ
  analysis/NN
  ./.)
