In [13]:
import pandas as pd
fname = 'pima-indians-diabetes.csv'
df = pd.read_csv(fname)
df = df.astype('float')

train = df.sample(frac=0.8, random_state=105)
test = df.drop(train.index)

In [14]:
outcome_group = train.groupby(df.columns[-1])
summaries = {}
num_attr = len(df.columns) - 1
for classvalue, instances in outcome_group:
    attr_ls = []
    mean = list(instances.mean(axis=0).values)
    std_dev = list(instances.std(axis=0).values)
    for i in range(num_attr):
        attr_ls.append([mean[i], std_dev[i]])
    summaries[classvalue] = attr_ls
    

In [15]:
import math
def calcProb(x, mean, std_dev):
    exponent = math.exp(-math.pow(x-mean, 2)/(2*math.pow(std_dev, 2)))
    return 1/math.sqrt(2*math.pi*math.pow(std_dev, 2))*exponent

In [16]:
def calcClassProbablities(summaries, X):
    probablities = {}
    for classValue, summ in summaries.items():
        probablities[classValue] = 1
        for i in range(len(summ)):
            mean, std_dev = summ[i]
            probablities[classValue] *= calcProb(X[i], mean, std_dev)
        
    return probablities
        

In [17]:
def predict(summaries, X):
    probablities = calcClassProbablities(summaries, X)
    bestLabel = None; bestProb = -1
    
    for label, prob in probablities.items():
        if bestLabel is None or prob > bestProb:
            bestLabel = label
            bestProb = prob
    
    return bestLabel

In [18]:
test_ls = test.values.tolist()
predictions = []
for i in range(len(test_ls)):
    p = predict(summaries, test_ls[i])
    predictions.append(p)

In [27]:
count = 0
for i in range(len(test)):
    if test.iloc[i, -1] == predictions[i]:
        count+=1
print(f'accuracy is -> {(count/float(len(test))) * 100.0}')

accuracy is -> 75.16339869281046
