In [3]:
from collections import defaultdict
from typing import List 
from nltk.tokenize import word_tokenize
def add_one_smoothing(tokens: List[str], n: int):
    """
    Applies add-one smoothing to the n-grams in tokens.
    
    Params: 
        tokens: Tokenized list of a sentence
        n: N-grams : int 
    
    Returns:
        Return the probability of n-grams: dict()
    """
    # Create a dictionary to store the counts of each n-gram
    counts = defaultdict(int)

    # Iterate over the n-grams in tokens
    for i in range(len(tokens)-n+1):
        ngram = tuple(tokens[i:i+n])
        counts[ngram] += 1

    # Create a dictionary to store the probabilities of each n-gram
    probs = {}

    # Calculate the total number of n-grams
    num_ngrams = len(counts.keys())

    # Calculate the number of unique n-grams
    num_unique_ngrams = len(set(tuple(tokens[i:i+n]) for i in range(len(tokens)-n+1)))

    # Iterate over the n-grams in counts
    for ngram in counts.keys():
        # Calculate the probability of the n-gram using add-one smoothing
        probs[ngram] = (counts[ngram]+1) / (len(tokens)-n+1+num_unique_ngrams)

    return probs

In [5]:
# Example usage
sentence = input("Please Enter a sentence: ")
tokens = word_tokenize(sentence)
n = int(input("Enter number of N-grams: "))
probs = add_one_smoothing(tokens, n)
print(probs)

Please Enter a sentence: My name is pranchal batra and I am from ambala cantt.
Enter number of N-grams: 2
{('My', 'name'): 0.09090909090909091, ('name', 'is'): 0.09090909090909091, ('is', 'pranchal'): 0.09090909090909091, ('pranchal', 'batra'): 0.09090909090909091, ('batra', 'and'): 0.09090909090909091, ('and', 'I'): 0.09090909090909091, ('I', 'am'): 0.09090909090909091, ('am', 'from'): 0.09090909090909091, ('from', 'ambala'): 0.09090909090909091, ('ambala', 'cantt'): 0.09090909090909091, ('cantt', '.'): 0.09090909090909091}
