In [None]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
from nltk.stem import PorterStemmer

# Download required resources
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
# Sample dataset
data = {'Text': ['Running is fun and healthy.',
                 'I love playing football, especially in the morning.',
                 'They are running faster than anyone else.',
                 'We enjoy coding and solving problems.']}

# Create DataFrame
df = pd.DataFrame(data)

# Display the original DataFrame
print("Original DataFrame:")
print(df)

Original DataFrame:
                                                Text
0                        Running is fun and healthy.
1  I love playing football, especially in the mor...
2          They are running faster than anyone else.
3              We enjoy coding and solving problems.


In [None]:
# Initialize the Porter Stemmer
stemmer = PorterStemmer()

# Define a function for preprocessing and stemming
def preprocess_and_stem(text):
    # Convert text to lowercase
    text = text.lower()

    # Remove punctuation
    text = text.translate(str.maketrans("", "", string.punctuation))

    # Tokenize the text
    tokens = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    # Apply stemming
    stemmed_tokens = [stemmer.stem(word) for word in tokens]

    return stemmed_tokens

# Apply the preprocessing function to the 'Text' column
df['Stemmed_Text'] = df['Text'].apply(preprocess_and_stem)

# Display the DataFrame with stemmed text
print("\nDataFrame after Stemming:")
print(df)


DataFrame after Stemming:
                                                Text  \
0                        Running is fun and healthy.   
1  I love playing football, especially in the mor...   
2          They are running faster than anyone else.   
3              We enjoy coding and solving problems.   

                          Stemmed_Text  
0                  [run, fun, healthi]  
1  [love, play, footbal, especi, morn]  
2            [run, faster, anyon, els]  
3         [enjoy, code, solv, problem]  
