In [1]:
# Install libraries (only once)
!pip install nltk textblob

# Import required libraries
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from textblob import TextBlob

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Read text from file
try:
    with open('sample_text.txt', 'r', encoding='utf-8') as file:
        text = file.read()
except FileNotFoundError:
    print("Error: File 'sample_text.txt' not found.")
    text = ""
except Exception as e:
    print(f"Error reading file: {e}")
    text = ""

# Only proceed if we have text to process
if text:
    # Step a: Cleaning text
    cleaned_text = re.sub(r'[^A-Za-z\s]', '', text)  # Remove punctuation, numbers
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()  # Remove extra whitespaces
    
    # Step b: Convert to lowercase
    cleaned_text = cleaned_text.lower()
    
    # Step c: Tokenization
    tokens = word_tokenize(cleaned_text)
    
    # Step d: Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word not in stop_words]
    
    # Step e: Correct misspelled words
    corrected_tokens = []
    for word in filtered_tokens:
        blob = TextBlob(word)
        corrected_word = str(blob.correct())
        corrected_tokens.append(corrected_word)
    
    # Final output
    print("Original Text:\n", text)
    print("\nCleaned Text:\n", cleaned_text)
    print("\nTokens after Stopword Removal:\n", filtered_tokens)
    print("\nTokens after Spell Correction:\n", corrected_tokens)
else:
    print("No text to process. Please ensure 'sample_text.txt' exists and contains text.")



[nltk_data] Downloading package punkt to C:\Users\Gauri
[nltk_data]     Deoghare\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Gauri
[nltk_data]     Deoghare\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Original Text:
 Once upon a time, in a beautiful village, there lived a brave little girl named Arya. 
She loved to explore forests, climb trees, and swim across rivers. 
Every morning, Arya would wake up early, feed the chickens, and collect fresh eggs. 
Her village was surrounded by tall mountains and lush green fields. 
Arya’s best friend, Sam, was a skilled archer and often joined her on adventures. 
One day, they decided to find the hidden waterfall beyond the dark woods. 
It was said that magical creatures lived near the waterfall. 
As they ventured deeper into the forest, they heard strange noises. 
The wind whispered secrets, and the trees seemed to move. 
Suddenly, they encountered a wise old owl who warned them of dangers ahead. 
Arya and Sam promised to be careful and continued their journey.
They crossed a rickety bridge and climbed a steep hill.
At the top, they could finally see the magnificent waterfall shimmering in the sunlight.
Rainbow-colored mist floated in the air,