In [None]:
# Install necessary libraries
# !pip install nltk spacy

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

import spacy

# Download required resources for NLTK
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Input text
text = "Hello! My name is Palak Singhal. I'm learning Natural Language Processing (NLP) using Python.\n I've never studied NLP in detail and now I'm loving this topic"

# ---------------------------
# a) Tokenization and Stemming using NLTK
# ---------------------------
def nltk_tokenize_and_stem(text):
    stemmer = PorterStemmer()
    tokens = word_tokenize(text)
    stemmed = [stemmer.stem(token) for token in tokens]
    return stemmed

# ---------------------------
# b) Lemmatization and Stopword Removal using NLTK
# ---------------------------
def nltk_lemmatize_and_remove_stopwords(text):
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(text)
    filtered = [lemmatizer.lemmatize(token.lower()) for token in tokens if token.lower() not in stop_words]
    return filtered

# ---------------------------
# c) Lemmatization and Stopword Removal using spaCy
# ---------------------------
def spacy_lemmatize_and_remove_stopwords(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    filtered = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    return filtered

# Run all three functions
print("a) NLTK Tokenization and Stemming:")
print(nltk_tokenize_and_stem(text))

print("\nb) NLTK Lemmatization and Stopword Removal:")
print(nltk_lemmatize_and_remove_stopwords(text))

print("\nc) spaCy Lemmatization and Stopword Removal:")
print(spacy_lemmatize_and_remove_stopwords(text))

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


a) NLTK Tokenization and Stemming:
['hello', '!', 'my', 'name', 'is', 'palak', 'singhal', '.', 'i', "'m", 'learn', 'natur', 'languag', 'process', '(', 'nlp', ')', 'use', 'python', '.', 'i', "'ve", 'never', 'studi', 'nlp', 'in', 'detail', 'and', 'now', 'i', "'m", 'love', 'thi', 'topic']

b) NLTK Lemmatization and Stopword Removal:
['hello', '!', 'name', 'palak', 'singhal', '.', "'m", 'learning', 'natural', 'language', 'processing', '(', 'nlp', ')', 'using', 'python', '.', "'ve", 'never', 'studied', 'nlp', 'detail', "'m", 'loving', 'topic']

c) spaCy Lemmatization and Stopword Removal:
['hello', 'Palak', 'Singhal', 'learn', 'Natural', 'Language', 'Processing', 'NLP', 'Python', 'study', 'NLP', 'detail', 'love', 'topic']
