In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import sys
import warnings
warnings.simplefilter(action='ignore')

import spacy
from nltk.stem import SnowballStemmer

def main():
    # Get filename from user
    
    file_path = r"C:\Users\91934\Downloads\Sample.txt"
    if not os.path.isfile(file_path):
        print(f"Error: File '{filename}' not found.")
        sys.exit(1)
    
    # Load spaCy model
    try:
        nlp = spacy.load("en_core_web_sm")
    except OSError:
        print("SpaCy model 'en_core_web_sm' not found. Install it using:")
        print("python -m spacy download en_core_web_sm")
        sys.exit(1)
    
    # Initialize stemmer
    stemmer = SnowballStemmer(language='english')
    
    # Read the file content
    with open(file_path, "r", encoding="utf-8") as file:
        content = file.read()
    
    # 1. Display original text sample (first 300 characters)
    print("Original Text Sample:")
    print(content[:300])
    print()
    
    # 2. Lemmatization: Individual Words (Demo 6.2.1)
    print("=== Lemmatization: Individual Words ===")
    sample_words = "friendship studied was am is organizing matches"
    doc_sample = nlp(sample_words)
    for token in doc_sample:
        if not token.is_space:
            print(f"{token.text} -> {token.lemma_}")
    print()
    
    # 3. Stemming: Individual Words (Demo 6.2.2)
    print("=== Stemming: Individual Words ===")
    sample_words_list = sample_words.split()
    for word in sample_words_list:
        stem = stemmer.stem(word)
        print(f"{word} --> {stem}")
    print()
    
    # 4. Lemmatization: Full Text (Demo 6.2.3)
    print("=== Lemmatization: Full Text ===")
    doc_full = nlp(content)
    tokens_full = [token for token in doc_full if not token.is_space]
    for token in tokens_full[:50]:
        print(f"{token.text} --> {token.lemma_}")
    print()
    
    # 5. Stemming: Full Text (Demo 6.2.4)
    print("=== Stemming: Full Text ===")
    for token in tokens_full[:50]:
        stem = stemmer.stem(token.text.lower())
        print(f"{token.text} --> {stem}")
    print()
    
    # 6. Practice Comparison Table (Practice 6.2)
    print("=== Practice 6.2: Lemmatization vs Stemming ===")
    print("Word\t\tLemma\t\tStem")
    print("------------------------------------------")
    practice_words = "running good universities flies fairer is"
    doc_practice = nlp(practice_words)
    practice_tokens = [token for token in doc_practice if not token.is_space]
    
    for token in practice_tokens:
        lemma = token.lemma_
        stem = stemmer.stem(token.text.lower())
        print(f"{token.text}\t\t{lemma}\t\t{stem}")
    print()
    
    # 7. Conclusion
    print("Conclusion:")
    print("Lemmatization produces dictionary-based meaningful root words, while stemming may distort words by chopping suffixes. For NLP tasks like search, topic modeling, and information retrieval, lemmatization gives better and cleaner output.")

if __name__ == "__main__":
    main()