In [3]:
import nltk
from nltk.util import ngrams
from nltk.probability import FreqDist, ConditionalFreqDist
from nltk.tokenize import word_tokenize

# nltk.download('punkt_tab')

text = """
I love natural language processing.
I love machine learning.
Language models learn patterns.
I love learning new things.
"""

tokens = word_tokenize(text.lower())

unigrams = FreqDist(tokens)
bigrams = ConditionalFreqDist(ngrams(tokens, 2))
trigrams = ConditionalFreqDist(((a, b), c) for a, b, c in ngrams(tokens, 3))

def unigram_prob(w):
    return unigrams[w] / unigrams.N()

def bigram_prob(w1, w2):
    return bigrams[w1][w2] / bigrams[w1].N() if bigrams[w1][w2] else 0

def trigram_prob(w1, w2, w3):
    return trigrams[(w1, w2)][w3] / trigrams[(w1, w2)].N() if trigrams[(w1, w2)][w3] else 0

def sentence_probability(sentence):
    words = word_tokenize(sentence.lower())

    uni = 1
    for w in words:
        uni *= unigram_prob(w)

    bi = 1
    for w1, w2 in ngrams(words, 2):
        bi *= bigram_prob(w1, w2)

    tri = 1
    for w1, w2, w3 in ngrams(words, 3):
        tri *= trigram_prob(w1, w2, w3)

    return uni, bi, tri

sentences = ["I love learning", "language models learn", "learning models love"]

for s in sentences:
    up, bp, tp = sentence_probability(s)
    print(f"\nSentence: '{s}'")
    print("Unigram Probability: ", up)
    print("Bigram Probability : ", bp)
    print("Trigram Probability:", tp)



Sentence: 'I love learning'
Unigram Probability:  0.0016904583020285497
Bigram Probability :  0.3333333333333333
Trigram Probability: 0.3333333333333333

Sentence: 'language models learn'
Unigram Probability:  0.00018782870022539445
Bigram Probability :  0.5
Trigram Probability: 1.0

Sentence: 'learning models love'
Unigram Probability:  0.0005634861006761833
Bigram Probability :  0
Trigram Probability: 0
