# Experiment 3 :
<b>Implement decision rule-based Naïve Bayes disambiguation method to find the sense of an
ambiguous word with the given training set.</b>

In [58]:
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
import string
import math

#PRE-PROCESS THE SENTENCE by tokenizing and removing stopwords
def process(sentence) :
    sentence = sentence.translate(str.maketrans('', '', string.punctuation))
    word_tokens = word_tokenize(sentence)
    stops = set(stopwords.words('english'))
    return [word for word in word_tokens if word.lower() not in stops]

with open('lab3.txt', 'r') as f :
    text = f.read()
dataset = {}
for data in text.split('\n') :
    sentence, sense = data.split(':')
    if sense in dataset :
        dataset[sense].append(sentence)
    else :
        dataset[sense] = [sentence]



In [59]:
formatted_dataset = {sense : [] for sense in dataset}
for sense, sentences in dataset.items() :
    for sentence in sentences :
        formatted_dataset[sense] += process(sentence)

In [60]:
vocabulary = {}
for words in formatted_dataset.values() :
    for word in words :
        if word in vocabulary :
            vocabulary[word] += 1
        else :
            vocabulary[word] = 1
                
priors = {sense : len(dataset[sense])/len(dataset) for sense in dataset}

conditionals  = {}
for word in vocabulary :
    conditionals[word] = dict()
    for sense in dataset :
        count_wc = formatted_dataset[sense].count(word)
        count_c = len(dataset[sense])
        prop = (count_wc + 1) / (count_c + len(vocabulary))
        conditionals[word][sense] = prop
    

In [61]:
def naive_bayes(sentence, priors, conditionals) :
    words = process(sentence)
    scores = {}
    for sense in priors :
        value = math.log(priors[sense])
        for word in words :
            if word in conditionals :
                value += math.log(conditionals[word][sense])
            else :
                value += math.log(1e-10)
        scores[sense] = value

    sense_predicted = max(scores, key = scores.get)
    print(f"{sentence}\nSENSE : {sense_predicted}\nSCORE : {scores[sense_predicted]}")
    


In [62]:
test_data = ['The light breeze was refreshing on a warm summer day.',
 'She preferred light exercise, like walking and yoga.',
 'This box is so light I can carry it with one hand.',
 'The light from the lamp was too bright for my eyes.',
 'The light jacket was just perfect for the cool evening air.',
 'The light color of the walls made the room look bigger.',
 'He gave a light chuckle when he heard the funny joke.',
 'I ordered a light meal because I wasn’t very hungry.',
 'She wore a light dress for the summer party.',
 'The light from the fireworks illuminated the night sky.']

for data in test_data :
    naive_bayes(data, priors, conditionals)

The light breeze was refreshing on a warm summer day.
SENSE :  Light (not intense)
SCORE : -25.59000306801089
She preferred light exercise, like walking and yoga.
SENSE :  Light (not intense)
SCORE : -26.113251211775438
This box is so light I can carry it with one hand.
SENSE :  Light (not heavy)
SCORE : -20.61710810189748
The light from the lamp was too bright for my eyes.
SENSE :  Light (brightness)
SCORE : -16.37372796051489
The light jacket was just perfect for the cool evening air.
SENSE :  Light (not heavy)
SCORE : -26.40093328422722
The light color of the walls made the room look bigger.
SENSE :  Light (color/appearance)
SCORE : -29.476708265454747
He gave a light chuckle when he heard the funny joke.
SENSE :  Light (mood/atmosphere)
SCORE : -27.078332107819023
I ordered a light meal because I wasn’t very hungry.
SENSE :  Light (not intense)
SCORE : -39.65089331873625
She wore a light dress for the summer party.
SENSE :  Light (color/appearance)
SCORE : -20.61710810189748
The li