In [56]:
import csv
import math 
import random
with open('diabetes.csv', newline='') as f:
    reader = csv.reader(f)
    data = list(reader)
print(data[:5])

[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'], ['6', '148', '72', '35', '0', '33.6', '0.627', '50', '1'], ['1', '85', '66', '29', '0', '26.6', '0.351', '31', '0'], ['8', '183', '64', '0', '0', '23.3', '0.672', '32', '1'], ['1', '89', '66', '23', '94', '28.1', '0.167', '21', '0']]


In [57]:
data.pop(0)#deleting the first row from the list
print(data[:5])

[['6', '148', '72', '35', '0', '33.6', '0.627', '50', '1'], ['1', '85', '66', '29', '0', '26.6', '0.351', '31', '0'], ['8', '183', '64', '0', '0', '23.3', '0.672', '32', '1'], ['1', '89', '66', '23', '94', '28.1', '0.167', '21', '0'], ['0', '137', '40', '35', '168', '43.1', '2.288', '33', '1']]


In [58]:
#creating the float values
for i in range(len(data)):
    data[i]=[float(x) for x in data[i]]
data[:5]

[[6.0, 148.0, 72.0, 35.0, 0.0, 33.6, 0.627, 50.0, 1.0],
 [1.0, 85.0, 66.0, 29.0, 0.0, 26.6, 0.351, 31.0, 0.0],
 [8.0, 183.0, 64.0, 0.0, 0.0, 23.3, 0.672, 32.0, 1.0],
 [1.0, 89.0, 66.0, 23.0, 94.0, 28.1, 0.167, 21.0, 0.0],
 [0.0, 137.0, 40.0, 35.0, 168.0, 43.1, 2.288, 33.0, 1.0]]

In [59]:
def splitData(data,splitRatio):
    trainSize=int(len(data)*splitRatio)
    trainSet=[]
    copy=list(data)
    while len(trainSet)<trainSize:
        index=random.randrange(len(copy))
        trainSet.append((copy.pop(index)))
    return [trainSet,copy]

In [60]:
##separating the data by class value
def separateByClass(data):
    separated={}
    for i in range(len(data)):
        vector=data[i]
        if (vector[-1] not in separated): 
                separated[vector[-1]]=[]
        separated[vector[-1]].append(vector)
    return separated    
        

In [61]:
#calculating mean
def mean(numbers):
    return sum(numbers)/float(len(numbers)-1)

In [62]:
#calculating standard deviation
def stdev(numbers):
    avg=mean(numbers)
    variance=sum([pow(x-avg,2) for x in numbers])/float(len(numbers))
    return math.sqrt(variance)

In [63]:
def summarize(data):
    summaries=[(mean(attribute),stdev(attribute)) for attribute in zip(*data)]
    del summaries[-1]
    return summaries

In [80]:
def summarizeByClass(data):
    separated=separateByClass(data)
    summaries={}
    for classValue,instances in separated.items():
        summaries[classValue]=summarize(instances)
    return summaries

In [81]:
def calProbability(x,mean,stdev):
    exponent=math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
    return (1/(math.sqrt(2*math.pi)*stdev))*exponent

In [82]:
def calClassProbability(summaries,inputVector):
    probabilities={}
    for classValue,classSummaries in summaries.items():
        probabilities[classValue]=1
        for i in range(len(classSummaries)):
            mean,stdev=classSummaries[i]
            x=inputVector[i]
            probabilities[classValue]*=calProbability(x,mean,stdev)
            
    return probabilities

In [83]:
def predict(summaries,inputVector):
    probabilities=calClassProbability(summaries,inputVector)
    bestLabel,bestProb=None ,-1
    for classValue,probability in probabilities.items():
        if bestLabel is None or probability>bestProb:
            bestProb=probability
            bestLabel=classValue
    return  bestLabel

In [84]:
def getPredictions(summaries,testSet):
    predictions=[]
    for i in range(len(testSet)):
        result=predict(summaries,testSet[i])
        predictions.append(result)
   
    return  predictions

In [85]:
def getAccuracy(testSet,predictions):
    correct=0
    for x in range(len(testSet)):
        if predictions[x] == testSet[x][-1]:
            correct+=1
    print("correct value",correct)        
    return (correct/float(len(testSet)))*100
   


In [88]:
###all functions are written 
#let's call them from one single function
def main():
    
    #prepare data
    splitRatio=.67
    trainingSet,testSet=splitData(data,splitRatio)
    #prepare model
    summaries=summarizeByClass(trainingSet)
    #test model
    predictions= getPredictions(summaries,testSet)
    accuracy=getAccuracy(testSet,predictions)
    print("accuracy of the model:",accuracy)
    
    
  
    
   

In [89]:
main()

correct value 191
accuracy of the model: 75.19685039370079
