In [1]:
# Import modules
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Import data
df = pd.read_csv('https://static.bc-edx.com/mbc/ai/m5/datasets/numeric_bank.csv')
df.head()


Unnamed: 0,age,balance,day,duration,campaign,pdays,previous,y
0,30,1787,19,79,1,-1,0,0
1,33,4789,11,220,1,339,4,0
2,35,1350,16,185,1,330,1,0
3,30,1476,3,199,4,-1,0,0
4,59,0,5,226,1,-1,0,0


In [2]:
y = df['y']

In [3]:
y

0       0
1       0
2       0
3       0
4       0
       ..
4516    0
4517    0
4518    0
4519    0
4520    0
Name: y, Length: 4521, dtype: int64

In [4]:
X = df.drop(columns='y')
X

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous
0,30,1787,19,79,1,-1,0
1,33,4789,11,220,1,339,4
2,35,1350,16,185,1,330,1
3,30,1476,3,199,4,-1,0
4,59,0,5,226,1,-1,0
...,...,...,...,...,...,...,...
4516,33,-333,30,329,5,-1,0
4517,57,-3313,9,153,1,-1,0
4518,57,295,19,151,11,-1,0
4519,28,1137,6,129,4,211,3


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [6]:
# Create the SVC model
from sklearn.svm import SVC
untuned_model = SVC(kernel='linear')
untuned_model

In [7]:
from sklearn.metrics import classification_report
target_names = ["negative", "positive"]
untuned_model.fit(X_train, y_train)
untuned_y_pred = untuned_model.predict(X_test)
print(classification_report(y_test, untuned_y_pred,
                            target_names=target_names))

              precision    recall  f1-score   support

    negative       0.91      0.97      0.94      1006
    positive       0.45      0.20      0.28       125

    accuracy                           0.88      1131
   macro avg       0.68      0.58      0.61      1131
weighted avg       0.86      0.88      0.86      1131



In [8]:
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
param_grid

{'C': [1, 5, 10, 50], 'gamma': [0.0001, 0.0005, 0.001, 0.005]}

In [9]:
from sklearn.model_selection import GridSearchCV
grid_clf = GridSearchCV(untuned_model, param_grid, verbose=3)

In [11]:
# Fit the model by using the grid search classifier.
# This will take the SVC model and try each combination of parameters.
grid_clf.fit(X_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END .................C=1, gamma=0.0001;, score=0.873 total time= 2.9min
[CV 2/5] END .................C=1, gamma=0.0001;, score=0.882 total time= 4.8min
[CV 3/5] END .................C=1, gamma=0.0001;, score=0.881 total time= 3.0min
[CV 4/5] END .................C=1, gamma=0.0001;, score=0.872 total time= 2.7min
[CV 5/5] END .................C=1, gamma=0.0001;, score=0.869 total time= 3.9min
[CV 1/5] END .................C=1, gamma=0.0005;, score=0.873 total time= 2.7min
[CV 2/5] END .................C=1, gamma=0.0005;, score=0.882 total time= 4.6min
[CV 3/5] END .................C=1, gamma=0.0005;, score=0.881 total time= 3.0min
[CV 4/5] END .................C=1, gamma=0.0005;, score=0.872 total time= 2.7min
[CV 5/5] END .................C=1, gamma=0.0005;, score=0.869 total time= 3.9min
[CV 1/5] END ..................C=1, gamma=0.001;, score=0.873 total time= 2.8min
[CV 2/5] END ..................C=1, gamma=0.001;

KeyboardInterrupt: 

In [None]:
# List the best parameters for this dataset
print(grid_clf.best_params_)

# List the best score
print(grid_clf.best_score_)

In [12]:
big_param_grid = {
    'C' : np.arange(0, 100, 1),
    'gamma': np.arange(0, 0.01, .0001),
}
big_param_grid

{'C': array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
        51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
        68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
        85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]),
 'gamma': array([0.    , 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007,
        0.0008, 0.0009, 0.001 , 0.0011, 0.0012, 0.0013, 0.0014, 0.0015,
        0.0016, 0.0017, 0.0018, 0.0019, 0.002 , 0.0021, 0.0022, 0.0023,
        0.0024, 0.0025, 0.0026, 0.0027, 0.0028, 0.0029, 0.003 , 0.0031,
        0.0032, 0.0033, 0.0034, 0.0035, 0.0036, 0.0037, 0.0038, 0.0039,
        0.004 , 0.0041, 0.0042, 0.0043, 0.0044, 0.0045, 0.0046, 0.0047,
        0.0048, 0.0049, 0.005 , 0.0051, 0.0052, 0.0053, 0.0054, 0.0055,
        0.0056, 0.0057, 0.0058, 

In [13]:
# Create the randomized search estimator along with a parameter object containing the values to adjust
from sklearn.model_selection import RandomizedSearchCV
random_clf = RandomizedSearchCV(untuned_model, big_param_grid, n_iter=100, random_state=1, verbose=3)
random_clf

# Fit the model by using the randomized search estimator.
# This will take the logistic regression model and try a random sample of combinations of parameters.
random_clf.fit(X_train, y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV 1/5] END .................C=2, gamma=0.0035;, score=0.866 total time= 5.1min
[CV 2/5] END .................C=2, gamma=0.0035;, score=0.876 total time= 3.1min
[CV 3/5] END .................C=2, gamma=0.0035;, score=0.861 total time= 2.6min
[CV 4/5] END .................C=2, gamma=0.0035;, score=0.863 total time= 3.6min
[CV 5/5] END .................C=2, gamma=0.0035;, score=0.864 total time= 2.4min
[CV 1/5] END ................C=51, gamma=0.0092;, score=0.855 total time= 5.3min
[CV 2/5] END ................C=51, gamma=0.0092;, score=0.866 total time= 4.5min
[CV 3/5] END ................C=51, gamma=0.0092;, score=0.855 total time= 3.6min
[CV 4/5] END ................C=51, gamma=0.0092;, score=0.850 total time= 3.8min
[CV 5/5] END ................C=51, gamma=0.0092;, score=0.850 total time= 2.7min
[CV 1/5] END .................C=9, gamma=0.0005;, score=0.853 total time=16.1min
[CV 2/5] END .................C=9, gamma=0.000

KeyboardInterrupt: 