In [6]:
import numpy as np
import pandas as pd 
import math

In [7]:
def split_data(dataset, train_per = 0.67 , test_per = 0.33):
    np.random.shuffle(dataset)
    n = len(dataset)
    train_n = int(n*train_per)
    train = dataset[:train_n,:]
    test = dataset[train_n:,:]
    return train,test

In [8]:
def pre_process(train):
    (nr,nc) = train.shape
    ncls = int(max(train[:,-1]))+1
    
    avg = np.zeros((nc,ncls))
    var = np.zeros((nc,ncls))
    class_prob = np.zeros((ncls))
    
    for cls in range(ncls):
        ind = (train[:,-1] == cls)
        subset = train[ind]
        class_prob[cls] = len(subset)
        avg[:,cls] = np.mean(subset,axis=0)
        var[:,cls] = np.var(subset,axis=0)
        
    class_prob = class_prob / len(train)
    
    return avg,var,class_prob

In [9]:
def gauss(x,mu,var):
    return (math.exp(-(-x-mu)**2/(2*var))/(math.sqrt(2*var*math.pi)))

In [11]:
dataset = np.array(pd.read_csv('diabetes.csv'))
train,test = split_data(dataset)
avg,var,class_prob = pre_process(train)
pred = []
print(train)
print(test[:,-1])

(nr,nc) = test.shape
(_,ncls) = var.shape

for r in range(nr):
    cls = train[r,-1]
    prob_list = []
    for cls in range(ncls):
        prob = 1
        for c in range(nc-1):
            prob*=gauss(test[r,c] , avg[c,cls] , var[c,cls])
        prob_list.append(prob)
    prediction = prob_list.index(max(prob_list))
    pred.append(prediction)
print(len(pred),pred)
print(np.sum(pred == test[:,-1])/nr)
        


[[  3.00000000e+00   1.13000000e+02   4.40000000e+01 ...,   1.40000000e-01
    2.20000000e+01   0.00000000e+00]
 [  3.00000000e+00   1.26000000e+02   8.80000000e+01 ...,   7.04000000e-01
    2.70000000e+01   0.00000000e+00]
 [  1.10000000e+01   1.38000000e+02   7.40000000e+01 ...,   5.57000000e-01
    5.00000000e+01   1.00000000e+00]
 ..., 
 [  1.00000000e+00   1.22000000e+02   9.00000000e+01 ...,   3.25000000e-01
    3.10000000e+01   1.00000000e+00]
 [  0.00000000e+00   1.73000000e+02   7.80000000e+01 ...,   1.15900000e+00
    5.80000000e+01   0.00000000e+00]
 [  4.00000000e+00   1.20000000e+02   6.80000000e+01 ...,   7.09000000e-01
    3.40000000e+01   0.00000000e+00]]
[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  1.  1.  0.  0.  0.  1.  0.
  0.  0.  0.  1.  1.  0.  0.  0.  1.  1.  0.  0.  1.  0.  1.  1.  1.  0.
  1.  1.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  1.
  0.  0.  0.  0.  0.  1.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.
  0.  1.  0.  0.  0.  0.  0.