In [44]:
import csv
import random
import math

In [61]:
def loadcsv(filename):
    lines = csv.reader(open(filename, 'r'))
    dataset = list(lines)
    for i in range(len(dataset)):
        dataset[i] = [float(x) for x in dataset[i]]
    return dataset

def splitdataset(dataset, splitratio):
    trainsize = int(len(dataset) * splitratio)
    trainset = []
    copy = list(dataset)
    while len(trainset) < trainsize:
        index = random.randrange(len(copy))
        trainset.append(copy.pop(index))
    return [trainset, copy]

def separatebyclass(dataset):
    separated = {}
    for i in range(len(dataset)):
        vector = dataset[i]
        if(vector[-1] not in separated):
            separated[vector[-1]] = []
        separated[vector[-1]].append(vector)
    return separated

def mean(numbers):
    return sum(numbers)/float(len(numbers))

def stdev(numbers):
    avg = mean(numbers)
    variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
    return math.sqrt(variance)
 
def summarize(dataset): 
    summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)];
    del summaries[-1]
    return summaries

def summarizebyclass(dataset):
    separated = separatebyclass(dataset); 
    summaries = {}
    for classvalue, instances in separated.items(): 
        summaries[classvalue] = summarize(instances) 
    return summaries

def calculateprobability(x, mean, stdev):
    exponent = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
    return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent

def calculateclassprobability(summaries, inputvector):
    probabilities = {}
    for classvalue, classsummaries in summaries.items():
        probabilities[classvalue] = 1
    for i in range(len(classsummaries)):
        mean, stdev = classsummaries[i]
        x = inputvector[i]
        probabilities[classvalue] *= calculateprobability(x, mean, stdev)
    return probabilities


def predict(summaries, inputvector):
    prababilities = calculateclassprobability(summaries, inputvector)
    bestlabel, bestprob = None, -1
    for classvalue, probability in prababilities.items():
        if bestlabel is None or probability > bestprob:
            bestprob = probability
            bestlabel = classvalue
    return bestlabel


def getpredictions(summaries, testset):
    predictions = []
    for i in range(len(testset)):
        result = predict(summaries, testset[i])
        predictions.append(result)
    return predictions

def getaccuracy(testset, predictions):
    correct = 0
    for i in range(len(testset)):
        if testset[i][-1] == predictions[i]:
            correct += 1
    return (correct / float(len(testset))) * 100.0

def main():
    filename = '6-dataset.csv'
    splitratio = 0.67
    dataset = loadcsv(filename)
    
    trainingset, testset = splitdataset(dataset, splitratio)
    print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset), len(trainingset), len(testset)))
   
    summaries = summarizebyclass(dataset)    
    print("\n First Five Rows of Training Set:\n")
    for i in range(0,5):
        print(trainingset[i],"\n")
    
    print("\n First Five Rows of Testing Set:\n")
    for i in range(0,5):
        print(testset[i],"\n")
   
    summaries = summarizebyclass(trainingset)
    print("\n Model Summaries:\n",summaries)
   
    predictions = getpredictions(summaries, testset)
    print("\nPredictions:\n",predictions)
    
    predictions = getpredictions(summaries, testset)
    accuracy = getaccuracy(testset, predictions)
    print('\nAccuracy of the classifier is : {0}%'.format(accuracy))

In [62]:
main()

Split 767 rows into train=513 and test=254 rows

 First Five Rows of Training Set:

[3.0, 176.0, 86.0, 27.0, 156.0, 33.3, 1.154, 52.0, 1.0] 

[7.0, 114.0, 66.0, 0.0, 0.0, 32.8, 0.258, 42.0, 1.0] 

[4.0, 123.0, 62.0, 0.0, 0.0, 32.0, 0.226, 35.0, 1.0] 

[2.0, 96.0, 68.0, 13.0, 49.0, 21.1, 0.647, 26.0, 0.0] 

[11.0, 127.0, 106.0, 0.0, 0.0, 39.0, 0.19, 51.0, 0.0] 


 First Five Rows of Testing Set:

[5.0, 116.0, 74.0, 0.0, 0.0, 25.6, 0.201, 30.0, 0.0] 

[10.0, 139.0, 80.0, 0.0, 0.0, 27.1, 1.441, 57.0, 0.0] 

[1.0, 189.0, 60.0, 23.0, 846.0, 30.1, 0.398, 59.0, 1.0] 

[5.0, 166.0, 72.0, 19.0, 175.0, 25.8, 0.587, 51.0, 1.0] 

[7.0, 100.0, 0.0, 0.0, 0.0, 30.0, 0.484, 32.0, 1.0] 


 Model Summaries:
 {1.0: [(4.870056497175141, 3.705042095389488), (141.47457627118644, 31.547595319073437), (71.01694915254237, 20.71628547354143), (21.576271186440678, 18.044329935886523), (97.43502824858757, 131.6943668728862), (34.94858757062148, 7.740816460230857), (0.5506045197740113, 0.3821767671419756), (37.338