In [4]:
import pandas as pd
import numpy as np
# import warnings
# warnings.filterwarnings("ignore")

In [5]:
df = pd.read_csv('https://static.bc-edx.com/mbc/ai/m5/datasets/numeric_bank.csv')
df.head()

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous,y
0,30,1787,19,79,1,-1,0,0
1,33,4789,11,220,1,339,4,0
2,35,1350,16,185,1,330,1,0
3,30,1476,3,199,4,-1,0,0
4,59,0,5,226,1,-1,0,0


In [6]:
target = df["y"]
target_names = ["negative", "positive"]

In [7]:
data = df.drop("y", axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous
0,30,1787,19,79,1,-1,0
1,33,4789,11,220,1,339,4
2,35,1350,16,185,1,330,1
3,30,1476,3,199,4,-1,0
4,59,0,5,226,1,-1,0


In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [9]:
# Create three KNN classifiers
from sklearn.neighbors import KNeighborsClassifier
untuned_model = KNeighborsClassifier()
grid_tuned_model = KNeighborsClassifier()
random_tuned_model = KNeighborsClassifier()

In [11]:
from sklearn.metrics import classification_report
## Train a model without tuning and print the classification report
untuned_model.fit(X_train, y_train)
y_pred_untuned = untuned_model.predict(X_test)

print(classification_report(y_pred_untuned, y_test, target_names=target_names))

              precision    recall  f1-score   support

    negative       0.96      0.91      0.93      1060
    positive       0.22      0.38      0.28        71

    accuracy                           0.87      1131
   macro avg       0.59      0.64      0.60      1131
weighted avg       0.91      0.87      0.89      1131



In [15]:
# Create the grid search estimator along with a parameter object containing the values to adjust.
# Try adjusting n_neighbors with values of 1 through 19. Adjust leaf_size by using 10, 50, 100, and 500.
# Include both uniform and distance options for weights.
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_neighbors':range(1, 20, 2), # [1, 3,5,7,9, 11, 13, 15, 17, 19] 
    'weights': ['uniform', 'distance'],
    'leaf_size':[10, 50, 100, 500], 
}

grid_clf = GridSearchCV(grid_tuned_model, param_grid, verbose=3)

In [16]:
# Fit the model by using the grid search estimator.
# This will take the KNN model and try each combination of parameters.
grid_clf.fit(X_train, y_train)

Fitting 5 folds for each of 80 candidates, totalling 400 fits
[CV 1/5] END leaf_size=10, n_neighbors=1, weights=uniform;, score=0.842 total time=   0.0s
[CV 2/5] END leaf_size=10, n_neighbors=1, weights=uniform;, score=0.867 total time=   0.0s
[CV 3/5] END leaf_size=10, n_neighbors=1, weights=uniform;, score=0.833 total time=   0.0s
[CV 4/5] END leaf_size=10, n_neighbors=1, weights=uniform;, score=0.817 total time=   0.0s
[CV 5/5] END leaf_size=10, n_neighbors=1, weights=uniform;, score=0.848 total time=   0.0s
[CV 1/5] END leaf_size=10, n_neighbors=1, weights=distance;, score=0.842 total time=   0.0s
[CV 2/5] END leaf_size=10, n_neighbors=1, weights=distance;, score=0.867 total time=   0.0s
[CV 3/5] END leaf_size=10, n_neighbors=1, weights=distance;, score=0.833 total time=   0.0s
[CV 4/5] END leaf_size=10, n_neighbors=1, weights=distance;, score=0.817 total time=   0.0s
[CV 5/5] END leaf_size=10, n_neighbors=1, weights=distance;, score=0.848 total time=   0.0s
[CV 1/5] END leaf_size=

[CV 1/5] END leaf_size=10, n_neighbors=19, weights=uniform;, score=0.882 total time=   0.0s
[CV 2/5] END leaf_size=10, n_neighbors=19, weights=uniform;, score=0.888 total time=   0.0s
[CV 3/5] END leaf_size=10, n_neighbors=19, weights=uniform;, score=0.891 total time=   0.0s
[CV 4/5] END leaf_size=10, n_neighbors=19, weights=uniform;, score=0.888 total time=   0.0s
[CV 5/5] END leaf_size=10, n_neighbors=19, weights=uniform;, score=0.886 total time=   0.0s
[CV 1/5] END leaf_size=10, n_neighbors=19, weights=distance;, score=0.882 total time=   0.0s
[CV 2/5] END leaf_size=10, n_neighbors=19, weights=distance;, score=0.891 total time=   0.0s
[CV 3/5] END leaf_size=10, n_neighbors=19, weights=distance;, score=0.889 total time=   0.0s
[CV 4/5] END leaf_size=10, n_neighbors=19, weights=distance;, score=0.888 total time=   0.0s
[CV 5/5] END leaf_size=10, n_neighbors=19, weights=distance;, score=0.883 total time=   0.0s
[CV 1/5] END leaf_size=50, n_neighbors=1, weights=uniform;, score=0.842 tot

[CV 1/5] END leaf_size=50, n_neighbors=17, weights=distance;, score=0.883 total time=   0.0s
[CV 2/5] END leaf_size=50, n_neighbors=17, weights=distance;, score=0.898 total time=   0.0s
[CV 3/5] END leaf_size=50, n_neighbors=17, weights=distance;, score=0.889 total time=   0.0s
[CV 4/5] END leaf_size=50, n_neighbors=17, weights=distance;, score=0.883 total time=   0.0s
[CV 5/5] END leaf_size=50, n_neighbors=17, weights=distance;, score=0.882 total time=   0.0s
[CV 1/5] END leaf_size=50, n_neighbors=19, weights=uniform;, score=0.882 total time=   0.0s
[CV 2/5] END leaf_size=50, n_neighbors=19, weights=uniform;, score=0.888 total time=   0.0s
[CV 3/5] END leaf_size=50, n_neighbors=19, weights=uniform;, score=0.891 total time=   0.0s
[CV 4/5] END leaf_size=50, n_neighbors=19, weights=uniform;, score=0.888 total time=   0.0s
[CV 5/5] END leaf_size=50, n_neighbors=19, weights=uniform;, score=0.886 total time=   0.0s
[CV 1/5] END leaf_size=50, n_neighbors=19, weights=distance;, score=0.882 t

[CV 5/5] END leaf_size=100, n_neighbors=15, weights=distance;, score=0.882 total time=   0.0s
[CV 1/5] END leaf_size=100, n_neighbors=17, weights=uniform;, score=0.885 total time=   0.0s
[CV 2/5] END leaf_size=100, n_neighbors=17, weights=uniform;, score=0.892 total time=   0.0s
[CV 3/5] END leaf_size=100, n_neighbors=17, weights=uniform;, score=0.891 total time=   0.0s
[CV 4/5] END leaf_size=100, n_neighbors=17, weights=uniform;, score=0.883 total time=   0.0s
[CV 5/5] END leaf_size=100, n_neighbors=17, weights=uniform;, score=0.882 total time=   0.0s
[CV 1/5] END leaf_size=100, n_neighbors=17, weights=distance;, score=0.883 total time=   0.0s
[CV 2/5] END leaf_size=100, n_neighbors=17, weights=distance;, score=0.898 total time=   0.0s
[CV 3/5] END leaf_size=100, n_neighbors=17, weights=distance;, score=0.889 total time=   0.0s
[CV 4/5] END leaf_size=100, n_neighbors=17, weights=distance;, score=0.883 total time=   0.0s
[CV 5/5] END leaf_size=100, n_neighbors=17, weights=distance;, sc

[CV 2/5] END leaf_size=500, n_neighbors=15, weights=distance;, score=0.892 total time=   0.0s
[CV 3/5] END leaf_size=500, n_neighbors=15, weights=distance;, score=0.889 total time=   0.0s
[CV 4/5] END leaf_size=500, n_neighbors=15, weights=distance;, score=0.882 total time=   0.0s
[CV 5/5] END leaf_size=500, n_neighbors=15, weights=distance;, score=0.882 total time=   0.0s
[CV 1/5] END leaf_size=500, n_neighbors=17, weights=uniform;, score=0.885 total time=   0.0s
[CV 2/5] END leaf_size=500, n_neighbors=17, weights=uniform;, score=0.892 total time=   0.0s
[CV 3/5] END leaf_size=500, n_neighbors=17, weights=uniform;, score=0.891 total time=   0.0s
[CV 4/5] END leaf_size=500, n_neighbors=17, weights=uniform;, score=0.883 total time=   0.0s
[CV 5/5] END leaf_size=500, n_neighbors=17, weights=uniform;, score=0.882 total time=   0.0s
[CV 1/5] END leaf_size=500, n_neighbors=17, weights=distance;, score=0.883 total time=   0.0s
[CV 2/5] END leaf_size=500, n_neighbors=17, weights=distance;, sc

In [17]:
# List the best parameters for this dataset
grid_clf.best_params_

{'leaf_size': 10, 'n_neighbors': 17, 'weights': 'distance'}

In [18]:
# Print the classification report for the best model
grid_y_pred = grid_clf.predict(X_test)

print(classification_report(grid_y_pred, y_test, target_names=target_names))

              precision    recall  f1-score   support

    negative       0.98      0.91      0.94      1085
    positive       0.18      0.48      0.26        46

    accuracy                           0.89      1131
   macro avg       0.58      0.69      0.60      1131
weighted avg       0.94      0.89      0.91      1131



In [19]:
# Create the parameter object for the randomized search estimator.
# Try adjusting n_neighbors with values of 1 through 19. 
# Adjust leaf_size by using a range from 1 to 500.
# Include both uniform and distance options for weights.

param_grid = {
    'n_neighbors':range(1, 20, 2), # [1, 3,5,7,9, 11, 13, 15, 17, 19] 
    'weights': ['uniform', 'distance'],
    'leaf_size':range(1, 500), 
}

param_grid

{'n_neighbors': range(1, 20, 2),
 'weights': ['uniform', 'distance'],
 'leaf_size': range(1, 500)}

In [21]:
# Create the randomized search estimator
from sklearn.model_selection import RandomizedSearchCV

random_clf = RandomizedSearchCV(random_tuned_model, param_grid, random_state=0, verbose=3)

In [22]:
# Fit the model by using the randomized search estimator.
random_clf.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END leaf_size=137, n_neighbors=13, weights=uniform;, score=0.883 total time=   0.0s
[CV 2/5] END leaf_size=137, n_neighbors=13, weights=uniform;, score=0.891 total time=   0.0s
[CV 3/5] END leaf_size=137, n_neighbors=13, weights=uniform;, score=0.885 total time=   0.0s
[CV 4/5] END leaf_size=137, n_neighbors=13, weights=uniform;, score=0.886 total time=   0.0s
[CV 5/5] END leaf_size=137, n_neighbors=13, weights=uniform;, score=0.881 total time=   0.0s
[CV 1/5] END leaf_size=493, n_neighbors=5, weights=distance;, score=0.867 total time=   0.0s
[CV 2/5] END leaf_size=493, n_neighbors=5, weights=distance;, score=0.878 total time=   0.0s
[CV 3/5] END leaf_size=493, n_neighbors=5, weights=distance;, score=0.867 total time=   0.0s
[CV 4/5] END leaf_size=493, n_neighbors=5, weights=distance;, score=0.853 total time=   0.0s
[CV 5/5] END leaf_size=493, n_neighbors=5, weights=distance;, score=0.864 total time=   0.0s
[CV 1/5] 

In [23]:
# List the best parameters for this dataset
random_clf.best_params_

{'weights': 'distance', 'n_neighbors': 19, 'leaf_size': 243}

In [24]:
# Make predictions with the hypertuned model
random_tuned_preds = random_clf.predict(X_test)

In [27]:
print(classification_report(y_pred_untuned, y_test, target_names=target_names))

              precision    recall  f1-score   support

    negative       0.96      0.91      0.93      1060
    positive       0.22      0.38      0.28        71

    accuracy                           0.87      1131
   macro avg       0.59      0.64      0.60      1131
weighted avg       0.91      0.87      0.89      1131



In [26]:
print(classification_report(grid_y_pred, y_test, target_names=target_names))

              precision    recall  f1-score   support

    negative       0.98      0.91      0.94      1085
    positive       0.18      0.48      0.26        46

    accuracy                           0.89      1131
   macro avg       0.58      0.69      0.60      1131
weighted avg       0.94      0.89      0.91      1131



In [25]:
# Calculate the classification report

print(classification_report(random_tuned_preds, y_test, target_names=target_names))

              precision    recall  f1-score   support

    negative       0.98      0.91      0.94      1088
    positive       0.18      0.51      0.26        43

    accuracy                           0.89      1131
   macro avg       0.58      0.71      0.60      1131
weighted avg       0.95      0.89      0.91      1131



## Interpretations
What were the best settings for the hyperparameters that were tested? How much improvement was made by tuning those hyperparameters?