In [1]:
import numpy as np
import csv

from Classifiers.Bayes_Gaussian import *
from Classifiers.Bayes_Parzen import *
from Classifiers.k_means_clustering import *
from Classifiers.k_nearest_neighbour import *
from Classifiers.logistic import *
from Classifiers.Naive_Bayes import *
from Classifiers.solver import *
from Classifiers.compute_performance import *

In [2]:
data = np.array(list(csv.reader(open("DataSets/Statlog/heart.csv"), delimiter=',')))
N = data.shape[0]
X, y = data[:, :-1].astype(np.float), data[:, -1].astype(np.int)
for i in range(X.shape[1]):
    X[:, i] = (X[:, i] - np.min(X[:, i])) / (np.max(X[:, i]) - np.min(X[:, i]))
y = y - 1
data = {}
random_perm = np.random.permutation(N)
data['X_train'] = X[random_perm[:220]]
data['y_train'] = y[random_perm[:220]]
data['X_test'] = X[random_perm[220:]]
data['y_test'] = y[random_perm[220:]]
for k, v in list(data.items()):
    print(('%s: ' % k, v.shape))
D, num_classes = data['X_train'].shape[1], np.max(y) + 1
print(D, num_classes)

('X_train: ', (220, 13))
('y_train: ', (220,))
('X_test: ', (50, 13))
('y_test: ', (50,))
13 2


In [3]:
naive_bayes = Naive_Bayes(D, num_classes)
naive_bayes.train(data['X_train'], data['y_train'])
y_pred1 = naive_bayes.test(data['X_test'])
performance = compute_performance(y_pred1, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 0.94)
('precision: ', array([0.96875   , 0.88888889]))
('recall: ', array([0.93939394, 0.94117647]))
('F1: ', 0.9340659340659341)


In [4]:
bayes = Bayes_Gaussian(D, num_classes)
bayes.train(data['X_train'], data['y_train'])
y_pred2 = bayes.test(data['X_test'])
performance = compute_performance(y_pred2, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 0.86)
('precision: ', array([0.88235294, 0.8125    ]))
('recall: ', array([0.90909091, 0.76470588]))
('F1: ', 0.8417005879692447)


In [5]:
bayes = Bayes_Parzen(num_classes)
bayes.train(data['X_train'], data['y_train'])
y_pred2 = bayes.test(data['X_test'], h=0.8)
performance = compute_performance(y_pred2, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 0.88)
('precision: ', array([0.86486486, 0.92307692]))
('recall: ', array([0.96969697, 0.70588235]))
('F1: ', 0.8571428571428572)


In [6]:
data['X_train'] = X[random_perm[:180]]
data['y_train'] = y[random_perm[:180]]
data['X_val'] = X[random_perm[180:220]]
data['y_val'] = y[random_perm[180:220]]

In [7]:
knn = KNearestNeighbor()
knn.train(data['X_train'], data['y_train'])
k_choices = [3, 5, 7, 9, 11]
best_k, best_f1 = 1, 0.0
for k in k_choices:
    y_pred3 = knn.test(data['X_val'], k=k)
    per = compute_performance(y_pred3, data['y_val'], num_classes)
    if per['F1'] > best_f1:
        best_k = k
        best_f1 = per['F1']
y_pred3 = knn.test(data['X_test'], k=best_k)
performance = compute_performance(y_pred3, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 0.86)
('precision: ', array([0.86111111, 0.85714286]))
('recall: ', array([0.93939394, 0.70588235]))
('F1: ', 0.8363721365123891)


In [8]:
K_choices = [2, 3, 5, 6, 7, 8, 9]
best_k_means, best_f1 = None, 0.0
for K in K_choices:
    k_means = K_means(D, K=K)
    k_means.train(data['X_train'], data['y_train'], num_iters=20)
    y_pred4 = k_means.test(data['X_val'])
    per = compute_performance(y_pred4, data['y_val'], num_classes)
    if per['F1'] > best_f1:
        best_k_means = k_means
        best_f1 = per['F1']
y_pred4 = best_k_means.test(data['X_test'])
performance = compute_performance(y_pred4, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 0.74)
('precision: ', array([0.79411765, 0.625     ]))
('recall: ', array([0.81818182, 0.58823529]))
('F1: ', 0.7060153776571687)


In [9]:
logistic = Logistic(input_dim=D, num_classes=num_classes, weight_scale=1e-1)
solver = Solver(logistic, data, lr=1e-1, num_epochs=200, verbose=0)
solver.train()
y_pred5 = logistic.test(data['X_test'])
performance = compute_performance(y_pred5, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 0.84)
('precision: ', array([0.87878788, 0.76470588]))
('recall: ', array([0.87878788, 0.76470588]))
('F1: ', 0.82174688057041)
