In [13]:
from collections import Counter
import re

def load_text_file(file_path):
    """Load text from a file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        print("Error: File not found. Please check the path and try again.")
        return None
    except Exception as e:
        print(f"Error reading the file: {e}")
        return None

def tokenize_text(text):
    """Tokenize text by splitting on non-alphanumeric characters."""
    return re.findall(r'\b\w+\b', text.lower())

def calculate_term_frequency(tokens):
    """Calculate the term frequency of each token."""
    return Counter(tokens)

def display_top_tokens(tf_counter, top_n=5):
    """Display the top N most frequent tokens."""
    print(f"\nTop {top_n} most frequent tokens:")
    for token, frequency in tf_counter.most_common(top_n):
        print(f"{token}: {frequency}")

def main():
    file_path = input("Enter the path to the text file: ")
    text_content = load_text_file(file_path)
    
    if text_content:
        tokens = tokenize_text(text_content)
        tf_counter = calculate_term_frequency(tokens)
        display_top_tokens(tf_counter)

if __name__ == "__main__":
    main()


Enter the path to the text file: C:\\Users\\ruchi\\OneDrive\\Documents\\sample.txt

Top 5 most frequent tokens:
the: 10
fox: 4
dog: 4
quick: 1
brown: 1
