# kNN

The main purpose of this file serves to run hyperparamter tuning to find the best settings for the chosen model, which will then be transferred onto the main `models.ipynb` file.

In [23]:
# IMPORTS
from utils import *

### Data

In [24]:
# READ AND SPLIT DATA
df = pd.read_pickle("../../datasets/pickle/processed_action_movie_data.pkl")

X, y = df.drop("rating", axis=1), df['rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = catboost_encoding(X_train, X_test, y_train, y_test)

In [25]:
# SCALING
scaler = StandardScaler(with_mean=False)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### TUNING

In [26]:
# Define the parameter grid
param_grid = {
    'n_neighbors': range(1, 120),
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}

In [None]:
# Create a kNN classifier object
knn = KNeighborsClassifier()

# Create a GridSearchCV object
grid_search = GridSearchCV(knn, param_grid=param_grid, cv=5, n_jobs=-1, scoring='f1_macro')

# Fit the GridSearchCV object to the data
grid_search.fit(X_train, y_train)

In [28]:
# Print the best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

Best parameters:  {'n_neighbors': 12, 'p': 1, 'weights': 'uniform'}
Best score:  0.6026109569679533
