In [36]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import ShuffleSplit, GridSearchCV
import pickle

In [2]:
X_train = np.load('Data/X_train.npy')
y_train = np.load('Data/y_train.npy')
X_test = np.load('Data/X_test.npy')
y_test = np.load('Data/y_test.npy')

In [34]:
param_grid = [{
    'n_neighbors': [1],
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}]

knn = KNeighborsClassifier()
cv = ShuffleSplit(n_splits=3, test_size=0.01, train_size=0.02, random_state=0)
clf = GridSearchCV(knn, param_grid, cv=cv , scoring='f1_macro')
clf.fit(X_train, y_train)
print(clf.best_params_)
print(clf.best_score_)

{'n_neighbors': 1, 'p': 2, 'weights': 'uniform'}
0.44479144757032985


In [35]:
best_model = KNeighborsClassifier(**clf.best_params_)
best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_test)
print('f1: ', f1_score(y_test, y_pred, average="macro"))
print(classification_report(y_test, y_pred))

f1:  0.594073398547211
              precision    recall  f1-score   support

          -2       0.45      0.75      0.57       473
          -1       0.61      0.74      0.67      2746
           0       0.81      0.65      0.72      4174
           1       0.51      0.50      0.51       522
           2       0.58      0.45      0.51       105

    accuracy                           0.67      8020
   macro avg       0.59      0.62      0.59      8020
weighted avg       0.70      0.67      0.68      8020



In [37]:
with open("Trained_models/KNN.pkl", "wb") as f:
    pickle.dump(best_model, f)