In [1]:
import numpy as np 
import pandas as pd 	
import matplotlib.pyplot as plt 
import math

df = pd.read_csv('weather_data.txt',delimiter="\t")
X = df.drop([df.columns[-1]], axis = 1)
y = df[df.columns[-1]]


In [4]:
features = list(X.columns)
print(features)

['Outlook', 'Temp', 'Humidity', 'Windy']


In [5]:
X_train = X
y_train = y
train_size = X.shape[0]
num_feats = X.shape[1]

In [6]:
likelihoods = {}
class_priors = {}
pred_priors = {}

In [10]:
for feature in features:
    likelihoods[feature] = {}
    pred_priors[feature] = {}

    for feat_val in np.unique(X_train[feature]):
        pred_priors[feature].update({feat_val: 0})

        for outcome in np.unique(y_train):
            likelihoods[feature].update({feat_val+'_'+outcome:0})
            class_priors.update({outcome: 0})


In [11]:
print(class_priors)

{'no': 0, 'yes': 0}


In [12]:
print(pred_priors)

{'Outlook': {'Overcast': 0, 'Rainy': 0, 'Sunny': 0}, 'Temp': {'Cool': 0, 'Hot': 0, 'Mild': 0}, 'Humidity': {'High': 0, 'Normal': 0}, 'Windy': {'f': 0, 't': 0}}


In [9]:
print(likelihoods)

{'Outlook': {'Overcast_no': 0, 'Overcast_yes': 0, 'Rainy_no': 0, 'Rainy_yes': 0, 'Sunny_no': 0, 'Sunny_yes': 0}, 'Temp': {'Cool_no': 0, 'Cool_yes': 0, 'Hot_no': 0, 'Hot_yes': 0, 'Mild_no': 0, 'Mild_yes': 0}, 'Humidity': {'High_no': 0, 'High_yes': 0, 'Normal_no': 0, 'Normal_yes': 0}, 'Windy': {'f_no': 0, 'f_yes': 0, 't_no': 0, 't_yes': 0}}


In [13]:
""" P(c) - Prior Class Probability """
for outcome in np.unique(y_train):
    outcome_count = sum(y_train == outcome)
    class_priors[outcome] = outcome_count / train_size

In [14]:
print(class_priors)

{'no': 0.35714285714285715, 'yes': 0.6428571428571429}


In [17]:
""" P(x|c) - Likelihood """
for feature in features:
    for outcome in np.unique(y_train):
        outcome_count = sum(y_train == outcome)
        feat_likelihood = X_train[feature][y_train[y_train == outcome].index.values.tolist()].value_counts().to_dict()
        print(feat_likelihood)
        for feat_val, count in feat_likelihood.items():
            likelihoods[feature][feat_val + '_' + outcome] = count/outcome_count

{'Rainy': 3, 'Sunny': 2}
{'Overcast': 4, 'Sunny': 3, 'Rainy': 2}
{'Mild': 2, 'Hot': 2, 'Cool': 1}
{'Mild': 4, 'Cool': 3, 'Hot': 2}
{'High': 4, 'Normal': 1}
{'Normal': 6, 'High': 3}
{'t': 3, 'f': 2}
{'f': 6, 't': 3}


In [18]:
print(likelihoods)

{'Outlook': {'Overcast_no': 0, 'Overcast_yes': 0.4444444444444444, 'Rainy_no': 0.6, 'Rainy_yes': 0.2222222222222222, 'Sunny_no': 0.4, 'Sunny_yes': 0.3333333333333333}, 'Temp': {'Cool_no': 0.2, 'Cool_yes': 0.3333333333333333, 'Hot_no': 0.4, 'Hot_yes': 0.2222222222222222, 'Mild_no': 0.4, 'Mild_yes': 0.4444444444444444}, 'Humidity': {'High_no': 0.8, 'High_yes': 0.3333333333333333, 'Normal_no': 0.2, 'Normal_yes': 0.6666666666666666}, 'Windy': {'f_no': 0.4, 'f_yes': 0.6666666666666666, 't_no': 0.6, 't_yes': 0.3333333333333333}}


In [21]:
""" P(x) - Evidence """
for feature in features:
    feat_vals = X_train[feature].value_counts().to_dict()
    print(feat_vals)
    for feat_val, count in feat_vals.items():
        pred_priors[feature][feat_val] = count/train_size

{'Rainy': 5, 'Sunny': 5, 'Overcast': 4}
{'Mild': 6, 'Cool': 4, 'Hot': 4}
{'Normal': 7, 'High': 7}
{'f': 8, 't': 6}


In [20]:
print(pred_priors)

{'Outlook': {'Overcast': 0.2857142857142857, 'Rainy': 0.35714285714285715, 'Sunny': 0.35714285714285715}, 'Temp': {'Cool': 0.2857142857142857, 'Hot': 0.2857142857142857, 'Mild': 0.42857142857142855}, 'Humidity': {'High': 0.5, 'Normal': 0.5}, 'Windy': {'f': 0.5714285714285714, 't': 0.42857142857142855}}


In [30]:
""" Calculates Posterior probability P(c|x) """
results = []
qu = np.array([['Rainy','Mild', 'Normal', 't']])
qu = np.array(qu)
### qu = x_test
## I am considering 0 f or this case but usually it will change depending upon the task
m = 0  ## m = 2
p = 0  ## p = 1/m
for query in qu:
    probs_outcome = {}
    for outcome in np.unique(y_train):
        prior = class_priors[outcome]
        likelihood = 1
        evidence = 1

        for feat, feat_val in zip(features, query):
            likelihood *= likelihoods[feat][feat_val + '_' + outcome]
            evidence *= pred_priors[feat][feat_val]

        posterior = (likelihood * prior + m*p) / (evidence + m)
        
        probs_outcome[outcome] = posterior
    print(probs_outcome)
    result = max(probs_outcome, key = lambda x: probs_outcome[x])
    results.append(result)
print(results)

{'no': 0.3136, 'yes': 0.43017832647462273}
['yes']
