In [2]:
# NLP Basics and NLP Pipeline

#Natural Language Processing (NLP) enables computers to understand and process
#human language using various text processing techniques.


In [3]:
import nltk
print("NLTK imported successfully")


ModuleNotFoundError: No module named 'nltk'

In [None]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\tamil\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\tamil\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\tamil\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\tamil\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\tamil\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\tamil\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-

True

In [None]:
## NLP Basics

Basic NLP operations include:
- Sentence Segmentation
- Tokenization
- Stopword Removal
- Stemming
- Lemmatization
- POS Tagging
- Parsed Text
- Named Entity Recognition (NER)


In [None]:
text = "Natural Language Processing is used in chatbots, search engines, and text analysis applications."
print("Original Text:")
print(text)


Original Text:
Natural Language Processing is used in chatbots, search engines, and text analysis applications.


In [None]:
from nltk.tokenize import sent_tokenize

sentences = sent_tokenize(text)
print("Sentence Segmentation:")
print(sentences)


Sentence Segmentation:
['Natural Language Processing is used in chatbots, search engines, and text analysis applications.']


In [None]:
from nltk.tokenize import word_tokenize

tokens = word_tokenize(text)
print("Tokens:")
print(tokens)


Tokens:
['Natural', 'Language', 'Processing', 'is', 'used', 'in', 'chatbots', ',', 'search', 'engines', ',', 'and', 'text', 'analysis', 'applications', '.']


In [None]:
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

print("After Stopword Removal:")
print(filtered_tokens)


After Stopword Removal:
['Natural', 'Language', 'Processing', 'used', 'chatbots', ',', 'search', 'engines', ',', 'text', 'analysis', 'applications', '.']


In [None]:
from nltk.stem import PorterStemmer

ps = PorterStemmer()
stemmed_tokens = [ps.stem(word) for word in filtered_tokens]

print("After Stemming:")
print(stemmed_tokens)


After Stemming:
['natur', 'languag', 'process', 'use', 'chatbot', ',', 'search', 'engin', ',', 'text', 'analysi', 'applic', '.']


In [None]:
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]

print("After Lemmatization:")
print(lemmatized_tokens)


After Lemmatization:
['Natural', 'Language', 'Processing', 'used', 'chatbots', ',', 'search', 'engine', ',', 'text', 'analysis', 'application', '.']


In [None]:
pos_tags = nltk.pos_tag(tokens)
print("POS Tags:")
print(pos_tags)


POS Tags:
[('Natural', 'JJ'), ('Language', 'NNP'), ('Processing', 'NNP'), ('is', 'VBZ'), ('used', 'VBN'), ('in', 'IN'), ('chatbots', 'NNS'), (',', ','), ('search', 'NN'), ('engines', 'NNS'), (',', ','), ('and', 'CC'), ('text', 'JJ'), ('analysis', 'NN'), ('applications', 'NNS'), ('.', '.')]


In [None]:
print("Parsed Text:")
for word, tag in pos_tags:
    print(f"{word} -> {tag}")


Parsed Text:
Natural -> JJ
Language -> NNP
Processing -> NNP
is -> VBZ
used -> VBN
in -> IN
chatbots -> NNS
, -> ,
search -> NN
engines -> NNS
, -> ,
and -> CC
text -> JJ
analysis -> NN
applications -> NNS
. -> .


In [None]:
from nltk import ne_chunk

ner_tree = ne_chunk(pos_tags)
print("Named Entity Recognition Tree:")
print(ner_tree)


Named Entity Recognition Tree:
(S
  Natural/JJ
  Language/NNP
  Processing/NNP
  is/VBZ
  used/VBN
  in/IN
  chatbots/NNS
  ,/,
  search/NN
  engines/NNS
  ,/,
  and/CC
  text/JJ
  analysis/NN
  applications/NNS
  ./.)


In [None]:
## NLP Pipeline Summary

1. Text Input  
2. Sentence Segmentation  
3. Tokenization  
4. Stopword Removal  
5. Stemming  
6. Lemmatization  
7. POS Tagging  
8. Parsed Text  
9. Named Entity Recognition  


In [None]:
## Conclusion

This notebook demonstrates NLP basics and a complete NLP pipeline using NLTK,
including segmentation, parsing, and named entity recognition.
