## LAB 4 - classifiers

In [1]:
import numpy as np

from models import MLClassifier, ParzenClassifer, KNNClassifier
from utils import confusion_matrix, accuracy_score, error_score

## 2 classes - 2 dimensions 

In [2]:
X, Y = np.load('data/lab4.p', allow_pickle=True)

We first convert the data in a canonical data structutre better suited for classification. This allow us to implement [classification models](models.py) which follows the style of scikit-learn's intuitive API, and can be applied to any number of classes and dimensions. For each classification task, we can "fit" the model to the training data, and "predict" the class of unseen samples, even though "fit" and "predict" do different things for each model.

In [3]:
x_train = np.vstack((X[0].T, X[1].T))
x_test = np.vstack((Y[0].T, Y[1].T))

y_train = np.concatenate([[1]*len(X[0].T), [2]*len(X[1].T)])
y_test = np.concatenate([[1]*len(Y[0].T), [2]*len(Y[1].T)])

In [4]:
def train_test_evaluate(desc, model):
    model.fit(x_train, y_train)
    y_pred_train = model.predict(x_train)
    y_pred_test = model.predict(x_test)
    err_train = error_score(y_pred_train, y_train)
    err_test = error_score(y_pred_test, y_test)
    cm_train = confusion_matrix(y_pred_train, y_train)
    cm_test = confusion_matrix(y_pred_test, y_test)
    
    names = ['Reclassification', 'Testing']
    error = [err_train, err_test]
    confusion = [cm_train, cm_test]
    
    print(desc)
    for name, err, cm in zip(names, error, confusion):
        print(f"\t{name}")
        print(f"\t\tP( error ) = {err:.2f}")
        for i, p_correct in enumerate(cm.diagonal()):
            print(f'\t\tP( correct | w_{i+1} ) = {p_correct:.2f} ')

In [9]:
models = {
    "Maximum likelihood" : MLClassifier(),
    "Parzen h1 = 0.1" : ParzenClassifer(h=0.1),
    "Parzen h1 = 5.0" : ParzenClassifer(h=5.0),
    "Nearest neighbours k = 1" : KNNClassifier(k=1),
    "Nearest neighbours k = 5" : KNNClassifier(k=5),
}

for desc, model in models.items():
    train_test_evaluate(desc, model)
    print(50 * '-')

Maximum likelihood
	Reclassification
		P( error ) = 0.05
		P( correct | w_1 ) = 0.97 
		P( correct | w_2 ) = 0.94 
	Testing
		P( error ) = 0.11
		P( correct | w_1 ) = 0.93 
		P( correct | w_2 ) = 0.85 
--------------------------------------------------
Parzen h1 = 0.1
	Reclassification
		P( error ) = 0.01
		P( correct | w_1 ) = 0.98 
		P( correct | w_2 ) = 1.00 
	Testing
		P( error ) = 0.12
		P( correct | w_1 ) = 0.91 
		P( correct | w_2 ) = 0.85 
--------------------------------------------------
Parzen h1 = 5.0
	Reclassification
		P( error ) = 0.04
		P( correct | w_1 ) = 0.92 
		P( correct | w_2 ) = 1.00 
	Testing
		P( error ) = 0.10
		P( correct | w_1 ) = 0.88 
		P( correct | w_2 ) = 0.91 
--------------------------------------------------
Nearest neighbours k = 1
	Reclassification
		P( error ) = 0.00
		P( correct | w_1 ) = 1.00 
		P( correct | w_2 ) = 1.00 
	Testing
		P( error ) = 0.12
		P( correct | w_1 ) = 0.93 
		P( correct | w_2 ) = 0.84 
---------------------------------------

We can check our results with scikit

In [10]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

models = {
    "skl ML" : GaussianNB(var_smoothing=0),
    "skl KNN k = 1" : KNeighborsClassifier(n_neighbors=1),
    "skl KNN k = 5" : KNeighborsClassifier(n_neighbors=5),
}

for desc, model in models.items():
    train_test_evaluate(desc, model)
    print(50 * '-')

skl ML
	Reclassification
		P( error ) = 0.05
		P( correct | w_1 ) = 0.97 
		P( correct | w_2 ) = 0.93 
	Testing
		P( error ) = 0.10
		P( correct | w_1 ) = 0.93 
		P( correct | w_2 ) = 0.88 
--------------------------------------------------
skl KNN k = 1
	Reclassification
		P( error ) = 0.00
		P( correct | w_1 ) = 1.00 
		P( correct | w_2 ) = 1.00 
	Testing
		P( error ) = 0.12
		P( correct | w_1 ) = 0.93 
		P( correct | w_2 ) = 0.84 
--------------------------------------------------
skl KNN k = 5
	Reclassification
		P( error ) = 0.03
		P( correct | w_1 ) = 0.97 
		P( correct | w_2 ) = 0.97 
	Testing
		P( error ) = 0.11
		P( correct | w_1 ) = 0.93 
		P( correct | w_2 ) = 0.86 
--------------------------------------------------
