# Imports

In [1]:
import os, errno

import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

# Function Definitions

In [2]:
def makedirs(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

In [3]:
def download_wine(basename, dirname):
    print("downloading wine dataset")
    data = datasets.load_wine(as_frame=True)
    df = data.frame
    targets = df.columns[~np.in1d(df.columns,data.feature_names)]
    df[targets] = data.target_names[df[targets]]
    makedirs(dirname)
    with open(os.path.join(dirname, basename), "w") as fp:
        df.to_csv(fp)

In [4]:
def load_wine(basename="data.csv", dirname="./datasets/wine"):
    if not os.path.exists( os.path.join(dirname, basename) ):
        download_wine(basename, dirname)
    with open(os.path.join(dirname, basename)) as fp:
        dataset_df = pd.read_csv(fp, index_col=0)
    return dataset_df

# K Nearest Neighbor

In [5]:
def train_knn(n_neighbors=5, weights="uniform"):
    # load dataset
    dataset_df = load_wine()
    X = dataset_df[dataset_df.columns[:-1]]
    y = dataset_df[dataset_df.columns[-1]]
    
    # create train/test splits
    splits = train_test_split(X, y, test_size=0.25, random_state=0, shuffle=True, stratify=y)
    X_train, X_test, y_train, y_test = splits
    
    # build classifier
    knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights)
    knn.fit(X_train, y_train)
    
    # evaluate performance
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    metrics = precision_recall_fscore_support(y_test, y_pred, average="macro")
    precision, recall, fscore, support = metrics
    print("{1:10.4f} {0:}".format("Accuracy",  accuracy))
    print("{1:10.4f} {0:}".format("Precision", precision))
    print("{1:10.4f} {0:}".format("Recall",    recall))
    print("{1:10.4f} {0:}".format("Fscore",    fscore))

# Run Experiments

In [6]:
print("KNN, n_neighbors=1")
train_knn(n_neighbors=1)
print()

print("KNN, n_neighbors=5, weights=uniform")
train_knn(n_neighbors=5, weights="uniform")
print()

print("KNN, n_neighbors=5, weights=distance")
train_knn(n_neighbors=5, weights="distance")
print()

KNN, n_neighbors=1
    0.7111 Accuracy
    0.7324 Precision
    0.7000 Recall
    0.7033 Fscore

KNN, n_neighbors=5, weights=uniform
    0.6667 Accuracy
    0.6416 Precision
    0.6537 Recall
    0.6417 Fscore

KNN, n_neighbors=5, weights=distance
    0.6444 Accuracy
    0.6772 Precision
    0.6444 Recall
    0.6467 Fscore

