In [1]:
import pandas as pd
import numpy as np
from operator import itemgetter

In [2]:
# Read the Toy Dataset
go_out_df = pd.read_csv('../data/go-out.csv', sep=';')

# To simplify transform the features into binaries 
go_out_df['Weather'] = np.where(go_out_df.Weather=='sunny', 1, 0)
go_out_df['Car'] = np.where(go_out_df.Car=='working', 1, 0)
go_out_df['Class'] = np.where(go_out_df.Class=='go-out', 1, 0)

go_out_df

Unnamed: 0,Weather,Car,Class
0,1,1,1
1,0,0,1
2,1,1,1
3,1,1,1
4,1,1,1
5,0,0,0
6,0,0,0
7,1,1,0
8,1,0,0
9,0,0,0


## Calculate the Prior Probabilities

* P(Class=1) = count(Class==1)/count(Class==1)+count(Class==0)
* P(Class=0) = count(Class==0)/count(Class==0)+count(Class==1)



## Calculate the Likelihood

### Weather

* P( Weather = 1 | Class = 1) = count( Weather = 1 and Class = 1) / count(Class = 1)
* P( Weather = 0 | Class = 1) = count( Weather = 0 and Class = 1) / count(Class = 1)
* P( Weather = 1 | Class = 0) = count( Weather = 1 and Class = 0) / count(Class = 0)
* P( Weather = 0 | Class = 0) = count( Weather = 0 and Class = 0) / count(Class = 0)

### Car

* P( Car = 1 | Class = 1) = count( Car = 1 and Class = 1) / count(Class = 1)
* P( Car = 0 | Class = 1) = count( Car = 0 and Class = 1) / count(Class = 1)
* P( Car = 1 | Class = 0) = count( Car = 1 and Class = 0) / count(Class = 0)
* P( Car = 0 | Class = 0) = count( Car = 0 and Class = 0) / count(Class = 0)


In [3]:
X = go_out_df[['Weather','Car']]
y = go_out_df.Class

prior_dict = {}
likelyhood_dict = {}

# For each Class in y
for c in y.unique():
    # Calculate the pior and store in the prior_dict dictionary
    prior_dict.update({c:{"prior":np.mean(y==c), "count":np.count_nonzero(y==c)}})

    # For each feature into X
    for col_name, feature_series in X.iloc[np.where(y == c)].iteritems():
        # For each class into feature
        for feature_c in feature_series.unique():
            # Calculate the Likelihood for each pair (Feature= feature class | Class = c)
            # P(Weather=1|Class=1) = 0.8
            # P(Weather=0|Class=1) = 0.2

            #print("P({}={}|Class={}) = {}".format(col_name, feature_c, c, np.count_nonzero(feature_series==feature_c)/len(feature_series)))
            if col_name not in likelyhood_dict.keys():
                likelyhood_dict.update({col_name:{(feature_c,c):np.count_nonzero(feature_series==feature_c)/len(feature_series)}})
            else:
                likelyhood_dict[col_name].update({(feature_c,c):np.count_nonzero(feature_series==feature_c)/len(feature_series)})

print(prior_dict)
print(likelyhood_dict)

{1: {'prior': 0.5, 'count': 5}, 0: {'prior': 0.5, 'count': 5}}
{'Weather': {(1, 1): 0.8, (0, 1): 0.2, (0, 0): 0.6, (1, 0): 0.4}, 'Car': {(1, 1): 0.8, (0, 1): 0.2, (0, 0): 0.8, (1, 0): 0.2}}


In [4]:
predictions = []
def func(x):
    
    posterior_dict = []
    for c in y.unique():
        # Initialize posterior as prior
        posterior = prior_dict[c]['prior']
        for col_name in x.index:
            # Naive assumption (independence):
            # P(x1,x2,x3|Y) = P(x1|Y)*P(x2|Y)*P(x3|Y)
            # Posterior is product of prior and likelihoods (ignoring scaling factor)
            posterior *= likelyhood_dict[col_name][(x[col_name],c)]
        posterior_dict.append((c,posterior))
        
    return posterior_dict

# For row in dataset
results = [ func(row) for index, row in X.iterrows()]
results

[[(1, 0.32000000000000006), (0, 0.04000000000000001)],
 [(1, 0.020000000000000004), (0, 0.24)],
 [(1, 0.32000000000000006), (0, 0.04000000000000001)],
 [(1, 0.32000000000000006), (0, 0.04000000000000001)],
 [(1, 0.32000000000000006), (0, 0.04000000000000001)],
 [(1, 0.020000000000000004), (0, 0.24)],
 [(1, 0.020000000000000004), (0, 0.24)],
 [(1, 0.32000000000000006), (0, 0.04000000000000001)],
 [(1, 0.08000000000000002), (0, 0.16000000000000003)],
 [(1, 0.020000000000000004), (0, 0.24)]]

In [5]:
prediction = [max(prob,key=itemgetter(1))[0] for prob in results]
prediction

[1, 0, 1, 1, 1, 0, 0, 1, 0, 0]

In [6]:
go_out_df['prediction'] = prediction
go_out_df

Unnamed: 0,Weather,Car,Class,prediction
0,1,1,1,1
1,0,0,1,0
2,1,1,1,1
3,1,1,1,1
4,1,1,1,1
5,0,0,0,0
6,0,0,0,0
7,1,1,0,1
8,1,0,0,0
9,0,0,0,0
