In [None]:
import csv
import random
import math

def loadcsv(filename):
    lines=csv.reader(open(filename,"r"))
    dataset=list(lines)
    for i in range(len(dataset)):
        dataset[i]=[float(x) for x in dataset[i]]
    return dataset

def splitDataset(dataset, splitratio):
    trainsize=int(len(dataset)*splitratio)
    trainset=[]
    copy=list(dataset)
    while len(trainset)<trainsize:
        index=random.randrange(len(copy))
        trainset.append(copy.pop(index))
    return [trainset,copy]

def seperatedbyclass(dataset):
    seperated={}
    for i in range(len(dataset)):
        vector=dataset[i]
        if(vector[-1] not in seperated):
            seperated[vector[-1]]=[]
        seperated[vector[-1]].append(vector)
    return seperated

def mean(numbers):
    return sum(numbers)/float(len(numbers))

def stdev(num):
    avg=mean(num)
    variance=sum([pow(x-avg,2) for x in num])/float(len(num)-1)
    return math.sqrt(variance)

def summarize(dataset):
    summaries=[(mean(attribute),stdev(attribute)) for attribute in zip(*dataset)]
    del summaries[-1]
    return summaries

def summarisebyclass(dataset):
    seperated=seperatedbyclass(dataset)
    summaries={}
    for classvalue,instances in seperated.items():
        summaries[classvalue]=summarize(instances)
    return summaries

def calculateprobability(x,mean,stdev):
    exponent=math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
    return ((1/math.sqrt(2*math.pi)*stdev)*exponent)

def calcculateclassprobabilities(summaries,inputvector):
    probabilites={}
    for classvalue,classsummaries in summaries.items():
       probabilites[classvalue]=1
    for i in range(len(classsummaries)):
        mean,stdev=classsummaries[i]
        x=inputvector[i]
        probabilites[classvalue]*=calculateprobability(x,mean,stdev)
    return probabilites
        
       
def predict(summaries, inputvector):
    probs= calcculateclassprobabilities(summaries,inputvector)
    bestlabel,bestprob=None,1
    for classvalue,probability in probs.items():
        if bestlabel is None or probability>bestprob:
            bestprob=probability
            bestlabel=classvalue
    return bestlabel

def getprediction(summ,testset):
    predictions=[]
    for i in range(len(testset)):
        result=predict(summ,testset[i])
        predictions.append(result)
    return predictions

def getaccuracy(testset,pred):
    correct=0
    for i in range(len(testset)):
        if testset[i][-1]==pred[i]:
            correct=1
    return(correct/float(len(testset)))*100.0
    
def main():
    filename="weatherid3.csv"
    splitratio=0.67
    dataset=loadcsv(filename)
    print("\nthe length of dataset",len(dataset))
    print("\nspliting dataset")
    train,test=splitDataset(dataset,splitratio)
    print("\n no of rows in train: {0}" .format(len(train)))
    print("\n no of rows in train: {0}" .format(len(train)))
    summaries=summarisebyclass(train)
    print("\n model summaries:\n",summaries)
    predictions=getprediction(summaries,test)
    print("\n Prections\n",predictions)
    accuracy=getaccuracy(test,predictions)
    print("\n Accuracy:{0}%".format(accuracy))

main()
    


    