In [45]:
import pandas as pd
import numpy as np
import math
import random
import operator

In [46]:
#Calculate  mean
def mean(n):
    return sum(n)/float(len(n))

In [47]:
#Calculate Standard Deviation
def standDeviation(n):
    avg = mean(n)
    variance = sum([(x-avg)**2 for x in n]) / float(len(n)-1)
    return np.sqrt(variance)

In [48]:
#Normalization the column
def normalize(x):
    return (x-mean(x))/ standDeviation(x)

In [49]:
#Calculate the Euclidean distance
def euclidean_distance(x1, x2):
    distance = 0.0
    for x in range(len(x1)-1):
        distance += np.square(x1[x] - x2[x])
    return np.sqrt(distance)


In [50]:
#Locating the similar neighbors
def get_neighbors(train, test_row, num_neighbors):
    distances = list()
    for x in train:
        dist = euclidean_distance(test_row, x)
        distances.append((x, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    for i in range(num_neighbors):
        neighbors.append(distances[i][0])
    return neighbors


In [51]:
#Classification prediction with neighbors
def predictClassification(train, test_row, num_neighbors):
    neighbors = get_neighbors(train, test_row, num_neighbors)
    output_values = [row[-1] for row in neighbors]
    prediction = min(set(output_values), key=output_values.count)
    return prediction


In [52]:
#kNN Algorithm
def k_nearest_neighbors(train, test, num_neighbors):
    predictions = list()
    for row in test:
        output = predictClassification(train, row, num_neighbors)
        predictions.append(output)
    return(predictions)

In [53]:
#Calculate confusion matrix
def confusion_matrix(actual, predicted):
    classes       = np.unique(np.concatenate((actual,predicted)))
    confusion_mtx = np.empty((len(classes),len(classes)),dtype=np.int)
    for i,a in enumerate(classes):
        for j,p in enumerate(classes):
            confusion_mtx[i,j] = np.where((actual==a)*(predicted==p))[0].shape[0]
    return confusion_mtx

In [54]:
#Calculate accuracy percentage
def accuractCalculation(actual, predicted):
    correct = 0
    for x in range(len(actual)):
        if actual[x] == predicted[x]:
            correct += 1
    return (correct/float(len(actual)))

In [55]:
#Main code
df = pd.read_csv('heart.csv')
data = df.to_numpy()
for i in range(len(data)):
    data[i] = [float(x) for x in data[i]]    

#Normalize the data
new_data1 = df.to_numpy()
rows = [0,3,4,7,9]
for i in rows:
    new_data1[i] = normalize(data[i])
    
#Spilting the data
m = len(new_data1)
train_end = int(0.6 * m)
validate_end = int(0.2 * m) + train_end
train = data[:train_end]
validate = data[train_end:validate_end]
test = data[validate_end+1:]

#Calculate the value of K
t = 0.0
for x in range(1,10):
    pd = k_nearest_neighbors(train,validate,x)
    act = [row[-1] for row in validate]
    accuracy = get_accuracy(act, pd)
    if (accuracy > t):
        t = accuracy
        k = x
print("Number of clusters:",k)

#Calculate the prediction
predicted = k_nearest_neighbors(train,test,k)
actual = [row[-1] for row in test]
results = confusion_matrix(actual, predicted)

tp=results[0][0]
fp=results[0][1]
fn=results[1][0]
tn=results[1][1]
accuracy = accuractCalculation(actual, predicted)
precision = tp / float(fp+tp)
recall = tn / float(tn+fn)
fscore = 2*precision*recall / float(precision + recall)

#Display
print('\nTraining Set(60%) = {0} \nTest Set(20%) = {1}\nValidation Set(20%) = {2}\n\n'.format(len(train), len(test), len(validate)))
print('Confusion Matrix:\n {0}\n'.format(results))
print('True Positive: {0}'.format(tp))
print('False Positive: {0}'.format(fp))
print('False Negative: {0}'.format(fn))
print('True Negative: {0}'.format(tn))
print('Accuracy: %.3f' % accuracy)
print('Precision: %.3f' % precision)
print('Recall: %.3f' % recall)
print('Fscore: %.3f' % fscore)

Number of clusters: 9

Training Set(60%) = 181 
Test Set(20%) = 61
Validation Set(20%) = 60


Confusion Matrix:
 [[50 11]
 [ 0  0]]

True Positive: 50
False Positive: 11
False Negative: 0
True Negative: 0
Accuracy: 0.820
Precision: 0.820
Recall: nan
Fscore: nan


  recall = tn / float(tn+fn)
