In [1]:
import numpy as np
training = np.asarray(((1,0,1,1),
                       (1,1,0,0),
                       (1,0,2,1),
                       (0,1,1,1),
                       (0,0,0,0),
                       (0,1,2,1),
                       (0,1,2,0),
                       (1,1,1,1)));
outcome = np.asarray((0,1,1,1,0,1,0,1))

## We begin by calculating the prior probabilities of the two classes. We use a dictionary for the same.

In [2]:
from collections import Counter,defaultdict
def occurence(outcome):
    prob = dict(Counter(outcome))
    for key in prob.keys():
        prob[key] = prob[key]/float(len(outcome))
    return prob

In [3]:
prior_prob = occurence(outcome)

In [4]:
prior_prob

{0: 0.375, 1: 0.625}

## Now we need to count the occurrences of the features class-wise. For this, we again employ a dictionary and separate the vectors of the 2 classes, by looking at a subset of the training set, taking one class at a time.

In [5]:
classes = np.unique(outcome)
rows, cols = training.shape
likelihoods = {}
for cls in classes:
    #initializing the dictionary
    likelihoods[cls] = defaultdict(list)
    
for cls in classes:
    #taking samples of only 1 class at a time
    row_indices = np.where(outcome==cls)[0]
    r,c = training[row_indices].shape
    for j in range(0,c):
        likelihoods[cls][j] += list(training[row_indices,j])

In [6]:
likelihoods

{0: defaultdict(list,
             {0: [1, 0, 0], 1: [0, 0, 1], 2: [1, 0, 2], 3: [1, 0, 0]}),
 1: defaultdict(list,
             {0: [1, 1, 0, 0, 1],
              1: [1, 0, 1, 1, 1],
              2: [0, 2, 1, 2, 1],
              3: [0, 1, 1, 1, 1]})}

For both the classes 0 and 1, and for each of the 4 features, we have thus saved the occurrences in likelihoods. For example, for the class 0 (no flu), and feature 1 (ie, runny nose), the occurrences are 0, 0 and 1. Now what remains is to calculate the probabilities, per class, per feature.

In [7]:
for cls in classes:
    for j in range(cols):
        likelihoods[cls][j] = occurence(likelihoods[cls][j]) # need laplace transformation to avoid 0 multiplication

In [8]:
likelihoods

{0: defaultdict(list,
             {0: {0: 0.6666666666666666, 1: 0.3333333333333333},
              1: {0: 0.6666666666666666, 1: 0.3333333333333333},
              2: {0: 0.3333333333333333,
               1: 0.3333333333333333,
               2: 0.3333333333333333},
              3: {0: 0.6666666666666666, 1: 0.3333333333333333}}),
 1: defaultdict(list,
             {0: {0: 0.4, 1: 0.6},
              1: {0: 0.2, 1: 0.8},
              2: {0: 0.2, 1: 0.4, 2: 0.4},
              3: {0: 0.2, 1: 0.8}})}

In [9]:
new_sample = np.asarray((1,0,1,0))
result = {}
for cls in classes:
    class_prob = prior_prob[cls]
    for i in range(len(new_sample)):
        cond_prob = likelihoods[cls][i]
        if new_sample[i] in cond_prob.keys():
            class_prob *= cond_prob[new_sample[i]]
        else:
            class_prob = 0 # this is no corresponding historical data
    result[cls] = class_prob
            
        

In [10]:
result

{0: 0.018518518518518517, 1: 0.006000000000000002}