In [91]:
import pandas as pd
import numpy as np

dataframe = pd.read_csv("datasets/diabetes.csv")
dataframe = dataframe.astype(float)
trainData = dataframe.sample(frac=0.8, random_state=100)
testData = dataframe.drop(trainData.index)

print(trainData)
print(testData)

     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
173          1.0     79.0           60.0           42.0     48.0  43.5   
253          0.0     86.0           68.0           32.0      0.0  35.8   
207          5.0    162.0          104.0            0.0      0.0  37.7   
737          8.0     65.0           72.0           23.0      0.0  32.0   
191          9.0    123.0           70.0           44.0     94.0  33.1   
..           ...      ...            ...            ...      ...   ...   
401          6.0    137.0           61.0            0.0      0.0  24.2   
82           7.0     83.0           78.0           26.0     71.0  29.3   
650          1.0     91.0           54.0           25.0    100.0  25.2   
721          1.0    114.0           66.0           36.0    200.0  38.1   
74           1.0     79.0           75.0           30.0      0.0  32.0   

     DiabetesPedigreeFunction   Age  Outcome  
173                     0.678  23.0      0.0  
253              

In [92]:
outcome = trainData.groupby(dataframe.columns[-1]) # get the last column
attrCount = len(dataframe.columns)-1

summaries = {}
for classValue, instances in outcome:
    attrMeanStd = []
    mean = list(instances.mean(axis=0).values)
    std = list(instances.std(axis=0).values)
    for itr in range(attrCount):
        attrMeanStd.append({mean[itr],std[itr]})
    summaries[classValue] = attrMeanStd

In [93]:
import math
def calculateProb(data,mean,std):
    exponent = math.exp(-math.pow(data-mean,2)/2*math.pow(std,2))
    return (1/(math.sqrt(2*math.pi*math.pow(std,2))))*exponent

In [94]:
def calculateClassProb(summaries,testData):
    probabilites={}
    for classValue,classSummaries in summaries.items():
        probabilites[classValue]=1
        for itr in range(len(classSummaries)):
            mean,std = classSummaries[itr]
            data = testData[itr]
            probabilites[classValue]*=calculateProb(data,mean,std)
    return probabilites      

In [102]:
def predict(summaries,testData):
    probabilities = calculateClassProb(summaries,testData)
    bestLabel, bestProbability = None, -1
    for classValue, probability in probabilities.items():
        if bestLabel is None or probability>bestProbability:
            bestLabel = classValue
            bestProbability = probability
    return bestProbability

predictions = []
testingData = testData.values.tolist()
for itr in range(len(testingData)):
    result = predict(summaries,testingData[itr])
    predictions.append(result)

In [105]:
def getAccuracy(testData,predictions):
    correct = 0
    for itr in range(len(testData)):
        if testData[itr][attrCount] == predictions[itr]:
            correct+=1
    return (correct/float(len(testData)))*100

accuracy = getAccuracy(testingData,predictions)
print("Accuracy : ",accuracy)

Accuracy :  65.5844155844156
