# Find the frequencies of distinct words using n-grams

In [1]:
from collections import Counter
from nltk.util import ngrams

def get_ngram_frequencies(sentence, n):
    words = sentence.split()
    n_grams = list(ngrams(words, n))
    frequencies = Counter(n_grams)
    return frequencies

# Example usage
sentence = "this is a test sentence and this is another test sentence"
n = 2  # Change n to generate different n-grams

frequencies = get_ngram_frequencies(sentence, n)

# Displaying output
for ngram, freq in frequencies.items():
    print(f"{ngram}: {freq}")

('this', 'is'): 2
('is', 'a'): 1
('a', 'test'): 1
('test', 'sentence'): 2
('sentence', 'and'): 1
('and', 'this'): 1
('is', 'another'): 1
('another', 'test'): 1


# Calculate the probabilities of n-grams in the sentence

In [6]:
from collections import Counter
from nltk.util import ngrams

def get_ngram_probabilities(sentence, n):
    words = sentence.split()
    n_grams = list(ngrams(words, n))
    frequencies = Counter(n_grams)
    total_ngrams = sum(frequencies.values())
    probabilities = {ngram: freq / total_ngrams for ngram, freq in frequencies.items()}
    return probabilities

# Example usage
sentence = "this is a test sentence and he has a test now"
n = 2 # Change n to generate different n-grams

probabilities = get_ngram_probabilities(sentence, n)

# Displaying output
for ngram, prob in probabilities.items():
    print(f"{ngram}: {prob:.4f}")

('this', 'is'): 0.1000
('is', 'a'): 0.1000
('a', 'test'): 0.2000
('test', 'sentence'): 0.1000
('sentence', 'and'): 0.1000
('and', 'he'): 0.1000
('he', 'has'): 0.1000
('has', 'a'): 0.1000
('test', 'now'): 0.1000


# Generate n-grams in reverse order

In [7]:
from collections import Counter
from nltk.util import ngrams

def get_reverse_ngrams(sentence, n):
    words = sentence.split()[::-1]  # Reverse the word order
    n_grams = list(ngrams(words, n))
    return n_grams

# Example usage
sentence = "this is a test sentence and this is another test sentence"
n = 2  # Change n to generate different n-grams

reverse_ngrams = get_reverse_ngrams(sentence, n)

# Displaying output
for ngram in reverse_ngrams:
    print(ngram)

('sentence', 'test')
('test', 'another')
('another', 'is')
('is', 'this')
('this', 'and')
('and', 'sentence')
('sentence', 'test')
('test', 'a')
('a', 'is')
('is', 'this')
