In [13]:

import numpy as np
import pandas as pd

from sklearn.metrics import recall_score
from sklearn.metrics import fbeta_score

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

from rich import print as pprint

#----------------------------------------------------------
# Data Handling
#

# Extract
url = ('https://raw.githubusercontent.com/'
       'andersonara/datasets/master/'
       'wall-robot-navigation.csv')

df = pd.read_csv(url, delimiter=';')

# Transform data into numpy matrices
X = df[['X1', 'X2']].to_numpy()
y = np.ravel(df[['Y']])

# Data split
X_train, X_test, \
y_train, y_test = train_test_split(X, y, 
                                   train_size=0.7, 
                                   shuffle=True,
                                   random_state=2002)

#----------------------------------------------------------
# Model evaluation
#

def get_best_knn_by_metric(metric, 
                           maxiter: int = 10, 
                           **kwargs):

    def train_knn(k):
        model = KNeighborsClassifier(n_neighbors=k)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        error = metric(y_test, y_pred, **kwargs)

        return (model, round(error, 5))

    def k_fit(maxiter):
        metrics = {}

        for k in range(1, maxiter + 1):
            _, metrics[k] = train_knn(k)

        best_k = max(metrics, key=metrics.get)

        return best_k

    def get_result(metric, maxiter):
        r = {}
        r['k'] = k_fit(maxiter)
        r['model'], r['metric_value'] = train_knn(r['k'])
        r['metric'] = metric
        
        return r

    return get_result(metric, maxiter)

metricas = {
    'Recall Micro': dict(metric=recall_score, average='micro'), 
    'Recall Macro': dict(metric=recall_score, average='macro'), 
    'F-Beta Micro': dict(metric=fbeta_score, average='micro', beta=2), 
    'F-Beta Macro': dict(metric=fbeta_score, average='macro', beta=2)
}

resultados = {}
for nome, p in metricas.items():
    resultados[nome] = get_best_knn_by_metric(**p)['metric_value']

#----------------------------------------------------------
# Show results
#

pprint(resultados)
        