In [105]:
# Import dependencies
import pandas as pd
import math

In [106]:
# Split the dataset by class values, returns a dictionary
def seperateClasses(dataset):
	classes = dict()
	for i in range(len(dataset)):
		vector = dataset[i]
		classValue = vector[-1]
		if (classValue not in classes):
			classes[classValue] = list()
		classes[classValue].append(vector)
	return classes



In [107]:
# Calculate mean, standard deviation
def mean(numericalValues):
    return sum(numericalValues)/float(len(numericalValues))

def standardDeviation(numericalValues):
    average = mean(numericalValues)
    variance = sum(pow(x-average) for x in numericalValues)/float(len(numericalValues)-1)
    return math.sqrt(variance)


In [108]:
# Summarizing data
def summarizingByClasses(dataset):
    seperated = seperatedByClass(dataset)
    summary = dict()
    for classValue,instances in seperated.items():
        summary["{}".format(classValue)] = summarizingByClasses(instances)
    return summary

In [109]:
# Calculating probabilities
def calculateProbability(x,mean,standardDeviation):
    expo = math.exp(-(math.pow(x-mean,2)))/(2*math.pow(standardDeviation,2))
    return (1/((math.sqrt(2*math.pi))*standardDeviation))*expo


In [110]:
# Calculate class probabilities
def calculateClassProbability(sumarry, inputVector):
    probabilities = dict()
    for classValue, classSummary in sumarry.items():
        probabilities[classValue] = 1
        for i in range(len(classSummary)):
            mean,standardDeviation = classSummary[i]
            x = inputVector[i]
            probabilities[classValue] *= calculateProbability(x,mean,standardDeviation)
        return probabilities

In [111]:
# Predict
def predict(summary, inputVector):
    probabilities = calculateClassProbability(summary,inputVector)
    bestLabel, bestProbability = None, -1
    for classValue, probability in probabilities.items():
        if bestLabel is None or probability>bestProbability:
            bestLabel = classValue
            bestProbability = probability
    return bestLabel

In [112]:
# Getting predictions
def getPredictions(summary,testingData):
    predictions = []
    for i in range(len(testingData)):
        result = predict(summary,testingData[i])
        predictions.append(result)
    return predictions

In [113]:
# Calculate accuracy
def getAccuracy(testingData,predictions):
    correct = 0
    for i in range(len(testingData)):
        if testingData[i][-1] == predictions[i]:
            correct += 1
    return (correct/float(len(testingData)))*100.0

In [114]:
# Running the model
trainingData = pd.read_csv("../../Data/TrainingData.csv")
metadata = trainingData.describe().to_dict()
seperatedByClass = seperateClasses((trainingData.values))
testingData = pd.read_csv("../../Data/TestingData.csv")
summaries = summarizingByClasses(trainingData)
predictions = getPredictions(summaries,testingData)
accuracy = getAccuracy(testingData,predictions)
print("Accuracy: ",accuracy)

TypeError: 'dict' object is not callable