In [2]:
import nltk
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [w for w in tokens if w.isalpha() and w not in stop_words]
    
    stemmed = [stemmer.stem(w) for w in tokens]
    lemmatized = [lemmatizer.lemmatize(w) for w in tokens]
    
    return stemmed, lemmatized

# Sample dataset
df = pd.DataFrame({
    "text": [
        "The economy is growing rapidly",
        "Technology is changing the world"
    ]
})

df[['stemmed', 'lemmatized']] = df['text'].apply(
    lambda x: pd.Series(preprocess_text(x))
)

print(df)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\krish\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\krish\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\krish\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


                               text                    stemmed  \
0    The economy is growing rapidly   [economi, grow, rapidli]   
1  Technology is changing the world  [technolog, chang, world]   

                      lemmatized  
0    [economy, growing, rapidly]  
1  [technology, changing, world]  
