In [1]:
import numpy as np
import csv

from Classifiers.Bayes_Parzen import *
from Classifiers.k_means_clustering import *
from Classifiers.k_nearest_neighbour import *
from Classifiers.logistic import *
from Classifiers.Naive_Bayes_discrete import *
from Classifiers.solver import *
from Classifiers.compute_performance import *

In [2]:
data = np.array(list(csv.reader(open("DataSets/Zoo/zoo.csv"), delimiter=',')))
N = data.shape[0]
X, y = data[:, 1:-1].astype(np.int), data[:, -1].astype(np.int)
y = y - 1
data = {}
random_perm = np.random.permutation(N)
data['X_train'] = X[random_perm[:80]]
data['y_train'] = y[random_perm[:80]]
data['X_test'] = X[random_perm[80:]]
data['y_test'] = y[random_perm[80:]]
for k, v in list(data.items()):
    print(('%s: ' % k, v.shape))
D, num_classes = data['X_train'].shape[1], np.max(y) + 1
print(D, num_classes)

('X_train: ', (80, 16))
('y_train: ', (80,))
('X_test: ', (21, 16))
('y_test: ', (21,))
16 7


In [3]:
naive_bayes = Naive_Bayes(D, num_classes)
naive_bayes.train(data['X_train'], data['y_train'])
y_pred1 = naive_bayes.test(data['X_test'])
performance = compute_performance(y_pred1, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 0.8571428571428571)
('precision: ', array([0.8, 1. , 1. , 1. , 1. , 1. , 1. ]))
('recall: ', array([1.        , 1.        , 1.        , 0.5       , 1.        ,
       0.66666667, 0.5       ]))
('F1: ', 0.8603174603174603)


In [4]:
bayes = Bayes_Parzen(num_classes)
bayes.train(data['X_train'], data['y_train'])
y_pred2 = bayes.test(data['X_test'], h=0.1)
performance = compute_performance(y_pred2, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 1.0)
('precision: ', array([1., 1., 1., 1., 1., 1., 1.]))
('recall: ', array([1., 1., 1., 1., 1., 1., 1.]))
('F1: ', 1.0)


In [5]:
data['X_train'] = X[random_perm[:60]]
data['y_train'] = y[random_perm[:60]]
data['X_val'] = X[random_perm[60:80]]
data['y_val'] = y[random_perm[60:80]]

In [6]:
knn = KNearestNeighbor()
knn.train(data['X_train'], data['y_train'])
k_choices = [3, 5, 7, 9, 11]
best_k, best_f1 = 1, 0.0
for k in k_choices:
    y_pred3 = knn.test(data['X_val'], k=k)
    per = compute_performance(y_pred3, data['y_val'], num_classes)
    if per['F1'] > best_f1:
        best_k = k
        best_f1 = per['F1']
y_pred3 = knn.test(data['X_test'], k=best_k)
performance = compute_performance(y_pred3, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 0.9523809523809523)
('precision: ', array([1.        , 0.66666667, 1.        , 1.        , 1.        ,
       1.        , 1.        ]))
('recall: ', array([0.91666667, 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        ]))
('F1: ', 0.9652173913043478)


In [7]:
K_choices = [6, 7, 8, 9, 10, 11]
best_k_means, best_f1 = None, 0.0
for K in K_choices:
    k_means = K_means(D, K=K)
    k_means.train(data['X_train'], data['y_train'], num_iters=20)
    y_pred4 = k_means.test(data['X_val'])
    per = compute_performance(y_pred4, data['y_val'], num_classes)
    if per['F1'] > best_f1:
        best_k_means = k_means
        best_f1 = per['F1']
y_pred4 = best_k_means.test(data['X_test'])
performance = compute_performance(y_pred4, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 0.9047619047619048)
('precision: ', array([1.        , 0.66666667, 1.        , 0.66666667, 1.        ,
       1.        , 1.        ]))
('recall: ', array([0.91666667, 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.5       ]))
('F1: ', 0.8890269151138718)


In [8]:
logistic = Logistic(input_dim=D, num_classes=num_classes, weight_scale=1e-1)
solver = Solver(logistic, data, lr=1.0, num_epochs=100, verbose=0)
solver.train()
y_pred5 = logistic.test(data['X_test'])
performance = compute_performance(y_pred5, data['y_test'], num_classes)
for k, v in list(performance.items()):
    print(('%s: ' % k, v))

('accuracy: ', 0.9523809523809523)
('precision: ', array([1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.66666667]))
('recall: ', array([1.        , 1.        , 1.        , 1.        , 1.        ,
       0.66666667, 1.        ]))
('F1: ', 0.9428571428571428)
