In [3]:
%%capture
%run ./code.ipynb

### K-Nearest Neighbors (KNN)

In [21]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score, GridSearchCV

In [33]:
import sklearn.metrics as metrics


def regression_results(y_true, y_pred):

    # Regression metrics
    explained_variance = metrics.explained_variance_score(y_true, y_pred)
    mean_absolute_error = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    mean_squared_log_error = metrics.mean_squared_log_error(y_true, y_pred)
    median_absolute_error = metrics.median_absolute_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)

    print('explained_variance: ', round(explained_variance, 4))
    print('mean_squared_log_error: ', round(mean_squared_log_error, 4))
    print('r2: ', round(r2, 4))
    print('MAE: ', round(mean_absolute_error, 4))
    print('MSE: ', round(mse, 4))
    print('RMSE: ', round(np.sqrt(mse), 4))


In [None]:
knn_rgs = KNeighborsRegressor(n_neighbors=5, n_jobs=-1)

In [4]:
knn_rgs.fit(X_train, y_train)

KNeighborsRegressor(n_jobs=-1)

In [5]:
knn_rgs.score(X_test, y_test)

0.9501148748813034

In [22]:
y_predict = knn_rgs.predict(X_test)

Apply hyperparameter for finding best scroe

In [13]:
grid_params = [{
    'weights': ['uniform', 'distance'],
    'n_neighbors': [i for i in range(3, 11,2)],
    'metric': ['euclidean', 'manhattan'],
}]

gs = GridSearchCV(KNeighborsRegressor(),
                  grid_params,
                  n_jobs=-1,
                  cv=5,
                  verbose=1)
gs_results = gs.fit(X_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits


In [14]:
print('Best score: {}'.format(gs_results.best_score_))
print('Best params: {}'.format(gs_results.best_params_))
print('Best estimator: {}'.format(gs_results.best_estimator_))


Best score: 0.9495476390677879
Best params: {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'distance'}
Best estimator: KNeighborsRegressor(metric='manhattan', n_neighbors=7, weights='distance')


In [24]:
gs_predict = gs.predict(X_test)

In [35]:
print('Before apply hyperparameter: \n')
print(regression_results(y_test, y_predict))

print('After apply hyperparameter: \n')
print(regression_results(y_test, gs_predict))

Before apply hyperparameter: 

explained_variance:  0.9502
mean_squared_log_error:  0.0078
r2:  0.9501
MAE:  1053.9888
MSE:  2419726.3832
RMSE:  1555.547
None
After apply hyperparameter: 

explained_variance:  0.9511
mean_squared_log_error:  0.0076
r2:  0.951
MAE:  1029.4971
MSE:  2376493.5799
RMSE:  1541.588
None


In [36]:
cv_score = cross_val_score(knn_rgs, X_train, y_train, cv=10)
print('Cross validation score: {}'.format(cv_score))
print('Mean cross validation score: {}'.format(np.mean(cv_score)))
print('Standard deviation cross validation score: {}'.format(np.std(cv_score)))

Cross validation score: [0.94526955 0.94974448 0.95055283 0.9485583  0.95491301 0.94688214
 0.93944819 0.9459707  0.94039415 0.95714854]
Mean cross validation score: 0.9478881886860604
Standard deviation cross validation score: 0.005338892618583622


In [50]:
cv_score = cross_val_score(gs.best_estimator_, X_train, y_train, cv=10)
print('Cross validation score: {}'.format(cv_score))
print('Mean cross validation score: {}'.format(np.mean(cv_score)))
print('Standard deviation cross validation score: {}'.format(np.std(cv_score)))

Cross validation score: [0.9492486  0.95391607 0.95188232 0.95218969 0.95787629 0.94804287
 0.940333   0.94661866 0.942444   0.96108939]
Mean cross validation score: 0.9503640882762037
Standard deviation cross validation score: 0.006113353775348264
