In [3]:
from matplotlib import pyplot as plt
from sklearn.preprocessing import normalize
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score
from sklearn import svm
from pandas import ExcelWriter
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
import numpy as np
import re


def xy_sets(line):
    X=None
    y=None
    indexes = map(int, re.findall('\d+', line))
    aux = False
    for i in indexes:
        if aux is False:
            X = scaled[i, :]
            y = target[i]
            aux = True
            continue
        X = np.vstack((X, scaled[i, :]))
        y = np.hstack((y, target[i]))
        
    return X, y, indexes


multiplier = ['0,05', '0,01', '0,001']
kneighbors = np.arange(10, 16)
dataset_name = ['57EDG', '54BND', 'combined']

for name in dataset_name:
    writer = ExcelWriter(name+'_knn.xlsx', engine='xlsxwriter')
    df = DataFrame()
    temp = DataFrame()
    for m in multiplier:
        for k in kneighbors:
            # load dataset
            dataset = read_csv(name+'_'+m+'.csv', header=0, index_col=(0,1))
            values = dataset.values

            # split data set into data(input) and target(output)
            data = values[:, 1:] 
            target = values[:, 0]

            # normalize features
            scaled = normalize(data[:, :3], norm='max', axis=0)
            scaled = np.concatenate((scaled, data[:, :4]), axis = 1)

            scores = np.zeros(20)
            cont = 0
            sensitivity = 0  # true positives rate
            specificity = 0  # true negatives rate
            y_test_full = np.array([])
            y_cont_full = np.array([])
            
            file = open(name+'_kf.txt', 'r')
            while cont < 20:

                train_line = file.readline()
                test_line = file.readline()

                X_train, y_train, train_indexes = xy_sets(train_line)
                X_test, y_test, test_indexes = xy_sets(test_line)
                
                classifier = KNeighborsClassifier(n_neighbors=k)  # classifier 
                classifier.fit(X_train, y_train)
                scores[cont] = classifier.score(X_test, y_test)
                y_pred = classifier.predict(X_test)
                
                y_test_full = np.hstack((y_test_full, y_test))
                y_cont_full = np.hstack((y_cont_full, y_pred))
                
                cont += 1
                
            file.close()
            
            print(confusion_matrix(y_test_full, y_cont_full))
            
            acc = accuracy_score(y_test_full, y_cont_full)
            recall = recall_score(y_test_full, y_cont_full, average=None)
            sns = recall[0]
            spf = recall[1]
            
            col = np.copy(scores)
            col = np.hstack((col, acc))
            col = np.hstack((col, sns))
            col = np.hstack((col, spf))
            # col = np.hstack((col, std))

            df.insert(0, k, col, True)
            last = df.index[-1]
            df = df.rename(index={last: 'specificity'})
            last = df.index[-2]
            df = df.rename(index={last: 'sensibility'})
            last = df.index[-3]
            df = df.rename(index={last: 'accuracy'})
            print(scores.mean())
            print("Accuracy: %0.2f" % (acc))
            print("Sensitivity: %0.2f" % (sns))
            print("Specificity: %0.2f" % (spf))
            
    df.to_excel(writer, sheet_name=name)
    writer.save()
    writer.close()
    

[[27 10]
 [ 7 13]]
0.7166666666666666
Accuracy: 0.70
Sensitivity: 0.73
Specificity: 0.65
[[35  2]
 [12  8]]
0.7583333333333333
Accuracy: 0.75
Sensitivity: 0.95
Specificity: 0.40
[[34  3]
 [ 8 12]]
0.8166666666666667
Accuracy: 0.81
Sensitivity: 0.92
Specificity: 0.60
[[36  1]
 [13  7]]
0.7583333333333333
Accuracy: 0.75
Sensitivity: 0.97
Specificity: 0.35
[[35  2]
 [13  7]]
0.7416666666666667
Accuracy: 0.74
Sensitivity: 0.95
Specificity: 0.35
[[36  1]
 [13  7]]
0.7583333333333333
Accuracy: 0.75
Sensitivity: 0.97
Specificity: 0.35
[[36  1]
 [13  7]]
0.7583333333333333
Accuracy: 0.75
Sensitivity: 0.97
Specificity: 0.35
[[36  1]
 [16  4]]
0.7083333333333333
Accuracy: 0.70
Sensitivity: 0.97
Specificity: 0.20
[[36  1]
 [16  4]]
0.7083333333333333
Accuracy: 0.70
Sensitivity: 0.97
Specificity: 0.20
[[37  0]
 [17  3]]
0.7083333333333333
Accuracy: 0.70
Sensitivity: 1.00
Specificity: 0.15
[[33  4]
 [ 3 17]]
0.8833333333333332
Accuracy: 0.88
Sensitivity: 0.89
Specificity: 0.85
[[36  1]
 [ 5 15]]
0.