In [24]:
import csv
import math 
import random

def loadcsv(filename):
    lines=csv.reader(open(filename,"r"))
    dataset=list(lines)
    for i in range(len(dataset)):
        dataset[i]=[float(x) for x in dataset[i]]
    return dataset


def splitdataset(dataset,splitRatio):
    trainSize=int(len(dataset)*splitRatio)
    trainSet=[]
    trainSet,testSet=dataset[:trainSize],dataset[trainSize:]
    return [trainSet,testSet]

def mean(numbers):
    return sum(numbers)/len(numbers)

def stdev(numbers):
    avg=mean(numbers)
    v=0
    for x in numbers:
        v+=(x-avg)**2
    return math.sqrt(v/(len(numbers)-1))

def summarizeByClass(dataset):
    seperated={}
    for i in range(len(dataset)):
        vector=dataset[i]
        if vector[-1] not in seperated:
            seperated[vector[-1]]=[]
        seperated[vector[-1]].append(vector)
    summaries={}
    for classValue,instances in seperated.items():
        summaries[classValue]=[(mean(attribute),stdev(attribute)) for attribute in zip(*instances)][:-1]
    return summaries

def calProb(x,mean,stdev):
    exponent = math.exp((-(x-mean)**2)/(2*(stdev**2)))
    return (1 / ((2*math.pi)**(1/2)*stdev)) * exponent


def predict(summaries,inputVector):
    probs={}
    for classValue,classSummaries in summaries.items():
        probs[classValue]=1
        
        for i in range(len(classSummaries)):
            mean,stdev=classSummaries[i]
            x=inputVector[i]
            probs[classValue]*=calProb(x,mean,stdev)
        
        bestLabel,bestProb=None,-1
        
        for classValue,probes in probs.items():
            if bestLabel==None or bestProb<probes:
                bestProb=probes
                bestLabel=classValue
    return bestLabel

def getPredictions(summaries,testset):
    prediction=[]
    for i in range(len(testset)):
        result=predict(summaries,testset[i])
        prediction.append(result)
    return prediction

def getAccuracy(testset,prediction):
    correct=0
    for i in range(len(testset)):
        if testset[i][-1]==prediction[i]:
            correct+=1
    return (correct/(len(testset)))*100

filename='diabetes2.csv'
splitratio=0.70
dataset=loadcsv(filename)
trainset,testset=splitdataset(dataset,splitratio)
summaries=summarizeByClass(trainset)
prediction=getPredictions(summaries,testset)
accuracy=getAccuracy(testset,prediction)
print("Prediction\n",prediction)
print("acuracy",accuracy)
        
     
        

    

Prediction
 [0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.