In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import string

# Ensure required resources are downloaded
nltk.download('punkt')

def get_most_common_words(file_path, num_words=10):
    """
    Tokenizes the text in the given file and returns the most common words.
    
    Parameters:
    file_path (str): Path to the text file.
    num_words (int): Number of most common words to return.
    
    Returns:
    list: A list of tuples containing the most common words and their counts.
    """
    try:
        # Load the text file
        with open(file_path, 'r') as file:
            text = file.read()
        
        # Tokenize the text
        tokens = word_tokenize(text)
        
        # Convert tokens to lowercase and remove punctuation
        words = [word.lower() for word in tokens if word.isalnum()]
        
        # Calculate word frequencies
        freq_dist = FreqDist(words)
        
        # Get the most common words
        return freq_dist.most_common(num_words)
    
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return []

# Test the function
file_path = 'sample.txt'  # Replace with the path to your text file
most_common_words = get_most_common_words(file_path)

# Display the results
print("10 Most Common Words:")
for word, count in most_common_words:
    print(f"{word}: {count}")


File not found: sample.txt
10 Most Common Words:


[nltk_data] Downloading package punkt to C:\Users\SHAIK
[nltk_data]     TOUFIQSAHEB\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
