In [1]:
import time
import pandas as pd
import numpy as np
from bayes_opt import BayesianOptimization
from sklearn import preprocessing
from sklearn.neural_network import MLPRegressor
from sklearn import model_selection

Config parameters:

In [2]:
# Path to datasets
train_dataset_name = './data/processed/credit_train.csv'
test_dataset_name = './data/processed/credit_test.csv'

# Target directory for saving
target_dir = 'PSDA/data/predictions/'
name_of_predictions_file = 'credit_test.csv'

# Run hyperparameter optimization (with a given random state it is unnecessary to compute each time)
hyper_opt = True

## Load Data and make temporary train-test-split

In [3]:
df_train = pd.read_csv(train_dataset_name)
df_test = pd.read_csv(test_dataset_name)

print(df_train.columns)
print(df_test.columns)

Index(['Unnamed: 0', 'Income', 'Limit', 'Rating', 'Cards', 'Balance',
       'Student_Yes'],
      dtype='object')
Index(['Unnamed: 0', 'Income', 'Limit', 'Rating', 'Cards', 'Student_Yes'], dtype='object')


In [4]:
train = df_train.values.astype(float)
train_ids = train[:, 0]
train_balance = train[:, list(df_train.columns).index('Balance')]
train_data = train[:, [c != 'Balance' and c != 'Unnamed: 0' for c in df_train.columns]]

test = df_test.values.astype(float)
test_ids = test[:, 0]
test_data = test[:, [c != 'Unnamed: 0' for c in df_test.columns]]

print(train.shape)
print(train_ids.shape)
print(train_balance.shape)
print(train_data.shape)
print(test.shape)
print(test_ids.shape)
print(test_data.shape)

(350, 7)
(350,)
(350,)
(350, 5)
(50, 6)
(50,)
(50, 5)


In [5]:
np.mean(train_balance)

521.7085714285714

In [6]:
# temp_train_ids, temp_test_ids, temp_train_data, temp_test_data, temp_train_balance, temp_test_balance = model_selection.train_test_split(train_ids, train_data, train_balance, test_size=50, random_state=4)

# Shuffle data to use K-Fold cross-validation in the following 
indicis = np.arange(train_data.shape[0])
np.random.seed(6)
np.random.shuffle(indicis)
random_train_balance = train_balance[indicis]
random_train_data = train_data[indicis]

Findings: By only looking at a single validation dataset, the result on the test data is not as meaningful, which is why I use K-Fold cross-validation below. 

## Preprocess data

In [7]:
# This code was from the static validation set and therefore is commented out. 
# snv = preprocessing.StandardScaler()
# prep_temp_train_data = np.c_[snv.fit_transform(temp_train_data[:, :6]), temp_train_data[:, 6:]]
# prep_temp_test_data = np.c_[snv.transform(temp_test_data[:, :6]), temp_test_data[:, 6:]]

# Only apply scaler on numerical columns
snv = preprocessing.StandardScaler()
prep_train_data = np.c_[snv.fit_transform(random_train_data[:, :6]), random_train_data[:, 6:]]
prep_test_data = np.c_[snv.fit_transform(test_data[:, :6]), test_data[:, 6:]]

## Create MLP Regressor instances 

Note: As done so in for the Gradient Boosting algorithm, I set every negative prediction to zero.

In [10]:
class MLPWithCutoff():
    
    def __init__(self, learning_rate_init, batch_size, alpha, beta_1, beta_2, hidden_layer_sizes=None, activation='relu', *, solver='adam', learning_rate='constant', power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, epsilon=1e-08, n_iter_no_change=10, max_fun=15000):
        self.mlp_regressor = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, activation='relu', solver='adam', alpha=alpha, batch_size=int(batch_size), learning_rate='constant', learning_rate_init=learning_rate_init, power_t=0.5, max_iter=5000, shuffle=True, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=beta_1, beta_2=beta_2, epsilon=1e-08, n_iter_no_change=10, max_fun=15000, random_state=7)
        
    def fit(self, X, y):
        self.mlp_regressor.fit(X, y)
    
    def predict(self, X):
        prediction = self.mlp_regressor.predict(X)
        prediction = prediction * np.array(prediction > 0, dtype=int)
        return prediction
    
    def get_params(self, deep=True):
        return self.mlp_regressor.get_params(deep=deep)

Note: In the following Bayesian optimization is used to find the optimal hyperparameter. The parameter bounds are:
* 'learning_rate_init': (0.000001, 0.1)
    - I want to explore the neighbourhood of the default value that is why I choose 0.1 as an upper bound and 0.000001 as an lower bound.
* 'batch_size': (1, 100) 
    - The batch_size has as upper bound 100 because in the train dataset are only 350 samples.
* 'alpha': (0.000001, 0.1)
    - I want to explore the neighbourhood of the default value that is why I choose 0.1 as an upper bound and 0.000001 as an lower bound.
* 'beta_1': (0.8, 0.999)
    - I want to explore the neighbourhood of the default value that is why I choose 0.8 as an upper bound and 0.999 as an lower bound.
* 'beta_2': (0.9, 0.999999)
    - I want to explore the neighbourhood of the default value that is why I choose 0.9 as an upper bound and 0.999999 as an lower bound.
* 'hidden_layer_count': (0, 20)
    - The architecture is [16] + [20] * hidden_layer_count + [16]. I want to set the upper bound of layers as high es possible.

In [8]:
def hyperparameter_function_mlp_with_cut(learning_rate_init, batch_size, alpha, beta_1, beta_2, hidden_layer_count):
    """ Function for hyperparameter optimization
    """    
    hidden_layer_sizes = [16]
    hidden_layer_sizes.extend([64] * int(hidden_layer_count))
    hidden_layer_sizes.append(16)      
    # Learning rate is fixed
    mlp_regressor = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, alpha=alpha, batch_size=int(batch_size), learning_rate_init=learning_rate_init, beta_1=beta_1, beta_2=beta_2, max_iter=15000, random_state=7)
    scores = model_selection.cross_val_score(mlp_regressor, X=prep_train_data, y=random_train_balance, cv=5, scoring='neg_mean_absolute_error')
    return np.min(scores)


# Bounded region of parameter space
pbounds = {'learning_rate_init': (0.000001, 0.1), 'batch_size': (1, 100), 'alpha': (0.000001, 0.1), 'beta_1': (0.8, 0.999), 'beta_2': (0.9, 0.999999), 'hidden_layer_count': (0, 20)}

optimizer = BayesianOptimization(
    f=hyperparameter_function_mlp_with_cut,
    pbounds=pbounds,
    random_state=7,
)
if hyper_opt:
    optimizer.maximize(
        init_points=50,
        n_iter=100,
    )

|   iter    |  target   |   alpha   | batch_... |  beta_1   |  beta_2   | hidden... | learni... |
-------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m-631.8   [0m | [0m0.007632 [0m | [0m78.21    [0m | [0m0.8872   [0m | [0m0.9723   [0m | [0m19.56    [0m | [0m0.05385  [0m |
| [95m2        [0m | [95m-418.0   [0m | [95m0.05011  [0m | [95m8.133    [0m | [95m0.8534   [0m | [95m0.95     [0m | [95m13.58    [0m | [95m0.08037  [0m |
| [95m3        [0m | [95m-97.99   [0m | [95m0.03809  [0m | [95m7.528    [0m | [95m0.8573   [0m | [95m0.991    [0m | [95m4.268    [0m | [95m0.04521  [0m |
| [0m4        [0m | [0m-165.1   [0m | [0m0.09312  [0m | [0m3.465    [0m | [0m0.9195   [0m | [0m0.995    [0m | [0m4.606    [0m | [0m0.05485  [0m |
| [0m5        [0m | [0m-396.3   [0m | [0m0.09091  [0m | [0m14.18    [0m | [0m0.9042   [0m | [0m0.975    [0m | [0m13.38    [0m 



| [0m65       [0m | [0m-242.3   [0m | [0m0.03206  [0m | [0m53.32    [0m | [0m0.9776   [0m | [0m0.9362   [0m | [0m7.082    [0m | [0m1e-06    [0m |
| [0m66       [0m | [0m-71.27   [0m | [0m0.08572  [0m | [0m49.32    [0m | [0m0.8586   [0m | [0m0.9605   [0m | [0m19.35    [0m | [0m0.00656  [0m |
| [0m67       [0m | [0m-533.1   [0m | [0m0.009911 [0m | [0m32.96    [0m | [0m0.8581   [0m | [0m0.9746   [0m | [0m15.09    [0m | [0m0.0948   [0m |
| [0m68       [0m | [0m-413.3   [0m | [0m0.02831  [0m | [0m36.53    [0m | [0m0.8892   [0m | [0m0.9299   [0m | [0m13.94    [0m | [0m0.05016  [0m |
| [0m69       [0m | [0m-609.8   [0m | [0m0.02193  [0m | [0m40.28    [0m | [0m0.8826   [0m | [0m0.9647   [0m | [0m14.38    [0m | [0m0.02156  [0m |
| [0m70       [0m | [0m-41.25   [0m | [0m0.05269  [0m | [0m38.98    [0m | [0m0.96     [0m | [0m0.9382   [0m | [0m11.26    [0m | [0m0.008871 [0m |
| [0m71       [0m | [0m-6



| [0m84       [0m | [0m-48.04   [0m | [0m0.02635  [0m | [0m63.11    [0m | [0m0.8      [0m | [0m1.0      [0m | [0m16.8     [0m | [0m1e-06    [0m |
| [0m85       [0m | [0m-142.3   [0m | [0m0.09212  [0m | [0m31.88    [0m | [0m0.926    [0m | [0m0.9516   [0m | [0m18.48    [0m | [0m0.01591  [0m |
| [0m86       [0m | [0m-32.6    [0m | [0m0.02481  [0m | [0m68.2     [0m | [0m0.8189   [0m | [0m0.9333   [0m | [0m17.13    [0m | [0m8.773e-05[0m |
| [0m87       [0m | [0m-548.7   [0m | [0m0.003283 [0m | [0m71.69    [0m | [0m0.8396   [0m | [0m0.9547   [0m | [0m18.23    [0m | [0m0.0765   [0m |
| [0m88       [0m | [0m-25.72   [0m | [0m0.01229  [0m | [0m77.19    [0m | [0m0.8857   [0m | [0m0.9753   [0m | [0m0.9237   [0m | [0m0.06925  [0m |
| [0m89       [0m | [0m-164.7   [0m | [0m0.03362  [0m | [0m74.92    [0m | [0m0.9868   [0m | [0m0.9244   [0m | [0m3.811    [0m | [0m0.01438  [0m |
| [0m90       [0m | [0m-2



| [0m107      [0m | [0m-236.0   [0m | [0m0.1      [0m | [0m86.88    [0m | [0m0.8      [0m | [0m0.985    [0m | [0m13.25    [0m | [0m1e-06    [0m |
| [0m108      [0m | [0m-111.5   [0m | [0m0.02395  [0m | [0m87.79    [0m | [0m0.9751   [0m | [0m0.9046   [0m | [0m0.0627   [0m | [0m0.09121  [0m |




| [0m109      [0m | [0m-251.0   [0m | [0m0.1      [0m | [0m90.92    [0m | [0m0.8      [0m | [0m0.9      [0m | [0m16.94    [0m | [0m1e-06    [0m |
| [0m110      [0m | [0m-398.9   [0m | [0m0.05216  [0m | [0m87.38    [0m | [0m0.8767   [0m | [0m0.9094   [0m | [0m16.92    [0m | [0m0.05183  [0m |
| [0m111      [0m | [0m-116.4   [0m | [0m0.06707  [0m | [0m1.079    [0m | [0m0.8896   [0m | [0m0.9588   [0m | [0m7.214    [0m | [0m0.03802  [0m |
| [0m112      [0m | [0m-85.44   [0m | [0m0.007429 [0m | [0m64.12    [0m | [0m0.8647   [0m | [0m0.9559   [0m | [0m2.455    [0m | [0m0.08526  [0m |
| [0m113      [0m | [0m-79.73   [0m | [0m0.07719  [0m | [0m60.52    [0m | [0m0.8693   [0m | [0m0.9711   [0m | [0m2.322    [0m | [0m0.08189  [0m |
| [95m114      [0m | [95m-16.19   [0m | [95m0.02465  [0m | [95m56.87    [0m | [95m0.9506   [0m | [95m0.9806   [0m | [95m0.8155   [0m | [95m0.09076  [0m |
| [0m115      [0m 



| [0m145      [0m | [0m-498.4   [0m | [0m0.1      [0m | [0m97.45    [0m | [0m0.999    [0m | [0m0.9      [0m | [0m11.6     [0m | [0m1e-06    [0m |
| [0m146      [0m | [0m-632.0   [0m | [0m1e-06    [0m | [0m97.62    [0m | [0m0.8      [0m | [0m1.0      [0m | [0m15.11    [0m | [0m0.1      [0m |
| [0m147      [0m | [0m-23.43   [0m | [0m0.01443  [0m | [0m99.86    [0m | [0m0.9103   [0m | [0m0.9927   [0m | [0m6.821    [0m | [0m0.0103   [0m |
| [0m148      [0m | [0m-17.03   [0m | [0m0.0194   [0m | [0m18.61    [0m | [0m0.8205   [0m | [0m0.9163   [0m | [0m0.0563   [0m | [0m0.01481  [0m |
| [0m149      [0m | [0m-630.9   [0m | [0m1e-06    [0m | [0m97.65    [0m | [0m0.8      [0m | [0m0.9997   [0m | [0m18.76    [0m | [0m0.1      [0m |
| [0m150      [0m | [0m-81.39   [0m | [0m0.08784  [0m | [0m92.96    [0m | [0m0.8451   [0m | [0m0.9515   [0m | [0m19.76    [0m | [0m0.004665 [0m |


Note: I adjust the 'learning_rate_init' and 'batch_size' interval to search in the area where the previous best model was found. 

In [9]:
def hyperparameter_function_mlp_with_cut(learning_rate_init, batch_size, alpha, beta_1, beta_2):
    """ Function for hyperparameter optimization
    """    
    hidden_layer_sizes = [16]
    hidden_layer_sizes.extend([64] * int(7))
    hidden_layer_sizes.append(16)      
    # Learning rate is fixed
    mlp_regressor = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, alpha=alpha, batch_size=int(batch_size), learning_rate_init=learning_rate_init, beta_1=beta_1, beta_2=beta_2, max_iter=5000, random_state=4)
    scores = model_selection.cross_val_score(mlp_regressor, X=prep_train_data, y=random_train_balance, cv=5, scoring='neg_mean_absolute_error')
    return np.min(scores)
    

# Bounded region of parameter space
pbounds = {'learning_rate_init': (0.0001, 0.1), 'batch_size': (60, 100), 'alpha': (0.000001, 0.1), 'beta_1': (0.8, 0.999), 'beta_2': (0.9, 0.999999)}

optimizer = BayesianOptimization(
    f=hyperparameter_function_mlp_with_cut,
    pbounds=pbounds,
    random_state=14,
)
if hyper_opt:
    optimizer.maximize(
        init_points=100,
        n_iter=200,
    )

|   iter    |  target   |   alpha   | batch_... |  beta_1   |  beta_2   | learni... |
-------------------------------------------------------------------------------------
| [0m1        [0m | [0m-137.9   [0m | [0m0.05139  [0m | [0m90.93    [0m | [0m0.9732   [0m | [0m0.9008   [0m | [0m0.03104  [0m |
| [95m2        [0m | [95m-96.59   [0m | [95m0.09576  [0m | [95m80.52    [0m | [95m0.8633   [0m | [95m0.9539   [0m | [95m0.0222   [0m |
| [0m3        [0m | [0m-100.1   [0m | [0m0.08065  [0m | [0m73.69    [0m | [0m0.9072   [0m | [0m0.9006   [0m | [0m0.06735  [0m |
| [95m4        [0m | [95m-95.78   [0m | [95m0.021    [0m | [95m97.3     [0m | [95m0.8745   [0m | [95m0.9752   [0m | [95m0.07634  [0m |
| [95m5        [0m | [95m-75.61   [0m | [95m0.08705  [0m | [95m64.4     [0m | [95m0.8597   [0m | [95m0.9475   [0m | [95m0.06733  [0m |
| [0m6        [0m | [0m-87.65   [0m | [0m0.02576  [0m | [0m88.05    [0m | [0m0.9307   [



| [0m179      [0m | [0m-389.1   [0m | [0m0.1      [0m | [0m73.97    [0m | [0m0.8      [0m | [0m1.0      [0m | [0m0.1      [0m |
| [0m180      [0m | [0m-49.76   [0m | [0m0.07897  [0m | [0m94.01    [0m | [0m0.9269   [0m | [0m0.9895   [0m | [0m0.04632  [0m |
| [0m181      [0m | [0m-37.43   [0m | [0m0.009995 [0m | [0m96.2     [0m | [0m0.9825   [0m | [0m0.9977   [0m | [0m0.0266   [0m |
| [0m182      [0m | [0m-5.166e+0[0m | [0m1e-06    [0m | [0m92.4     [0m | [0m0.999    [0m | [0m0.9      [0m | [0m0.0001   [0m |
| [0m183      [0m | [0m-37.89   [0m | [0m0.08153  [0m | [0m91.88    [0m | [0m0.8101   [0m | [0m0.9955   [0m | [0m0.02895  [0m |
| [0m184      [0m | [0m-3.754e+0[0m | [0m1e-06    [0m | [0m90.29    [0m | [0m0.999    [0m | [0m0.9      [0m | [0m0.0001   [0m |
| [0m185      [0m | [0m-383.8   [0m | [0m0.1      [0m | [0m86.23    [0m | [0m0.999    [0m | [0m1.0      [0m | [0m0.0001   [0m |
| [0m

Note: Now I changed the architecture the optimization is not only done on the layer count but also on the layer size. To restrict the search area, all layers have the same size.

In [None]:
def hyperparameter_function_mlp_with_cut(learning_rate_init, batch_size, alpha, beta_1, beta_2, layer_count, hidden_size):
    """ Function for hyperparameter optimization
    """    
    hidden_layer_sizes = [int(hidden_size)] * int(layer_count)
    # Learning rate is fixed
    mlp_regressor = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, alpha=alpha, batch_size=int(batch_size), learning_rate_init=learning_rate_init, beta_1=beta_1, beta_2=beta_2, max_iter=15000, random_state=7)
    scores = model_selection.cross_val_score(mlp_regressor, X=prep_train_data, y=random_train_balance, cv=5, scoring='neg_mean_absolute_error')
    return np.min(scores)


# Bounded region of parameter space
pbounds = {'learning_rate_init': (0.000001, 0.1), 'batch_size': (1, 300), 'alpha': (0.000001, 0.1), 'beta_1': (0.8, 0.999), 'beta_2': (0.9, 0.999999), 'layer_count': (1, 5), 'hidden_size': (64, 128)}

optimizer = BayesianOptimization(
    f=hyperparameter_function_mlp_with_cut,
    pbounds=pbounds,
    random_state=7,
)
if hyper_opt:
    optimizer.maximize(
        init_points=50,
        n_iter=100,
    )

|   iter    |  target   |   alpha   | batch_... |  beta_1   |  beta_2   | hidden... | layer_... | learni... |
-------------------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m-62.78   [0m | [0m0.007632 [0m | [0m234.2    [0m | [0m0.8872   [0m | [0m0.9723   [0m | [0m126.6    [0m | [0m3.154    [0m | [0m0.05011  [0m |
| [95m2        [0m | [95m-18.75   [0m | [95m0.007206 [0m | [95m81.26    [0m | [95m0.8995   [0m | [95m0.9679   [0m | [95m115.4    [0m | [95m2.524    [0m | [95m0.006595 [0m |
| [0m3        [0m | [0m-28.16   [0m | [0m0.02882  [0m | [0m273.0    [0m | [0m0.8425   [0m | [0m0.9452   [0m | [0m123.6    [0m | [0m1.1      [0m | [0m0.06006  [0m |
| [0m4        [0m | [0m-45.02   [0m | [0m0.09501  [0m | [0m69.86    [0m | [0m0.9091   [0m | [0m0.9909   [0m | [0m72.52    [0m | [0m3.094    [0m | [0m0.07504  [0m |
| [0m5        [0m | [0m-29.25   [0m | 



| [0m31       [0m | [0m-630.5   [0m | [0m0.09735  [0m | [0m286.7    [0m | [0m0.8845   [0m | [0m0.9594   [0m | [0m66.54    [0m | [0m4.955    [0m | [0m0.08187  [0m |
| [0m32       [0m | [0m-21.81   [0m | [0m0.06365  [0m | [0m228.6    [0m | [0m0.8374   [0m | [0m0.9308   [0m | [0m79.77    [0m | [0m3.384    [0m | [0m0.009191 [0m |
| [0m33       [0m | [0m-620.8   [0m | [0m0.08956  [0m | [0m139.2    [0m | [0m0.8885   [0m | [0m0.9105   [0m | [0m107.8    [0m | [0m4.268    [0m | [0m0.06296  [0m |
| [0m34       [0m | [0m-29.15   [0m | [0m0.0242   [0m | [0m235.8    [0m | [0m0.829    [0m | [0m0.9827   [0m | [0m101.2    [0m | [0m2.157    [0m | [0m0.05132  [0m |
| [0m35       [0m | [0m-22.4    [0m | [0m0.06289  [0m | [0m78.32    [0m | [0m0.9685   [0m | [0m0.9421   [0m | [0m121.1    [0m | [0m4.342    [0m | [0m0.009936 [0m |
| [0m36       [0m | [0m-49.72   [0m | [0m0.06463  [0m | [0m93.84    [0m | [0m0.95



| [0m38       [0m | [0m-17.26   [0m | [0m0.0337   [0m | [0m295.6    [0m | [0m0.823    [0m | [0m0.9053   [0m | [0m110.9    [0m | [0m2.483    [0m | [0m0.03615  [0m |
| [0m39       [0m | [0m-50.98   [0m | [0m0.08765  [0m | [0m98.86    [0m | [0m0.9769   [0m | [0m0.9644   [0m | [0m85.06    [0m | [0m1.238    [0m | [0m0.02451  [0m |
| [0m40       [0m | [0m-19.81   [0m | [0m0.09684  [0m | [0m122.2    [0m | [0m0.8318   [0m | [0m0.9298   [0m | [0m121.6    [0m | [0m1.66     [0m | [0m0.07779  [0m |




| [0m41       [0m | [0m-76.19   [0m | [0m0.01349  [0m | [0m288.5    [0m | [0m0.9055   [0m | [0m0.9043   [0m | [0m123.6    [0m | [0m2.433    [0m | [0m0.07314  [0m |
| [0m42       [0m | [0m-16.3    [0m | [0m0.05237  [0m | [0m28.68    [0m | [0m0.8211   [0m | [0m0.9149   [0m | [0m74.32    [0m | [0m1.211    [0m | [0m0.004702 [0m |
| [0m43       [0m | [0m-119.2   [0m | [0m0.09484  [0m | [0m28.3     [0m | [0m0.9012   [0m | [0m0.9119   [0m | [0m77.75    [0m | [0m4.052    [0m | [0m0.09381  [0m |
| [0m44       [0m | [0m-1.603e+2[0m | [0m0.04697  [0m | [0m2.687    [0m | [0m0.9988   [0m | [0m0.9048   [0m | [0m88.92    [0m | [0m3.156    [0m | [0m0.08917  [0m |
| [0m45       [0m | [0m-427.5   [0m | [0m0.08231  [0m | [0m183.2    [0m | [0m0.8793   [0m | [0m0.9834   [0m | [0m119.6    [0m | [0m4.488    [0m | [0m0.07176  [0m |
| [0m46       [0m | [0m-18.3    [0m | [0m0.00978  [0m | [0m90.28    [0m | [0m0.89



| [0m64       [0m | [0m-630.7   [0m | [0m0.05059  [0m | [0m291.3    [0m | [0m0.8564   [0m | [0m0.969    [0m | [0m84.14    [0m | [0m4.184    [0m | [0m0.09183  [0m |
| [0m65       [0m | [0m-18.38   [0m | [0m0.05855  [0m | [0m114.4    [0m | [0m0.8681   [0m | [0m0.9512   [0m | [0m70.16    [0m | [0m1.153    [0m | [0m0.07505  [0m |
| [0m66       [0m | [0m-49.11   [0m | [0m0.07457  [0m | [0m88.58    [0m | [0m0.8699   [0m | [0m0.9814   [0m | [0m68.89    [0m | [0m4.756    [0m | [0m0.02082  [0m |
| [0m67       [0m | [0m-17.56   [0m | [0m0.08167  [0m | [0m52.2     [0m | [0m0.9395   [0m | [0m0.9616   [0m | [0m67.89    [0m | [0m1.202    [0m | [0m0.0403   [0m |
| [0m68       [0m | [0m-152.2   [0m | [0m0.06881  [0m | [0m168.2    [0m | [0m0.8427   [0m | [0m0.9617   [0m | [0m64.74    [0m | [0m4.046    [0m | [0m0.07843  [0m |
| [0m69       [0m | [0m-216.2   [0m | [0m0.0569   [0m | [0m258.3    [0m | [0m0.99



| [0m75       [0m | [0m-125.0   [0m | [0m0.1      [0m | [0m1.0      [0m | [0m0.999    [0m | [0m0.9      [0m | [0m128.0    [0m | [0m1.0      [0m | [0m1e-06    [0m |
| [0m76       [0m | [0m-74.66   [0m | [0m0.02019  [0m | [0m255.7    [0m | [0m0.9611   [0m | [0m0.9972   [0m | [0m127.7    [0m | [0m3.883    [0m | [0m0.01859  [0m |
| [0m77       [0m | [0m-295.2   [0m | [0m0.1      [0m | [0m61.06    [0m | [0m0.999    [0m | [0m0.9      [0m | [0m128.0    [0m | [0m1.0      [0m | [0m0.1      [0m |
| [0m78       [0m | [0m-146.0   [0m | [0m0.04574  [0m | [0m81.78    [0m | [0m0.9928   [0m | [0m0.9609   [0m | [0m83.23    [0m | [0m4.598    [0m | [0m0.07935  [0m |




| [0m79       [0m | [0m-110.3   [0m | [0m0.08951  [0m | [0m299.8    [0m | [0m0.8786   [0m | [0m0.9132   [0m | [0m96.54    [0m | [0m3.445    [0m | [0m0.06222  [0m |




| [0m80       [0m | [0m-631.4   [0m | [0m1e-06    [0m | [0m300.0    [0m | [0m0.8      [0m | [0m0.9      [0m | [0m64.0     [0m | [0m1.0      [0m | [0m1e-06    [0m |
| [0m81       [0m | [0m-18.21   [0m | [0m0.02868  [0m | [0m226.7    [0m | [0m0.8801   [0m | [0m0.9076   [0m | [0m114.1    [0m | [0m1.512    [0m | [0m0.08498  [0m |
| [0m82       [0m | [0m-295.2   [0m | [0m0.006638 [0m | [0m271.9    [0m | [0m0.9794   [0m | [0m0.9139   [0m | [0m64.84    [0m | [0m2.876    [0m | [0m0.01044  [0m |
| [0m83       [0m | [0m-30.32   [0m | [0m0.08306  [0m | [0m102.2    [0m | [0m0.919    [0m | [0m0.9007   [0m | [0m64.24    [0m | [0m4.862    [0m | [0m0.09488  [0m |
| [0m84       [0m | [0m-21.38   [0m | [0m0.08283  [0m | [0m195.3    [0m | [0m0.8782   [0m | [0m0.9457   [0m | [0m64.07    [0m | [0m2.159    [0m | [0m0.07091  [0m |
| [0m85       [0m | [0m-24.92   [0m | [0m1e-06    [0m | [0m257.6    [0m | [0m0.8 



| [0m87       [0m | [0m-630.4   [0m | [0m0.1      [0m | [0m300.0    [0m | [0m0.999    [0m | [0m1.0      [0m | [0m128.0    [0m | [0m5.0      [0m | [0m1e-06    [0m |
| [0m88       [0m | [0m-49.22   [0m | [0m0.04572  [0m | [0m59.97    [0m | [0m0.836    [0m | [0m0.9582   [0m | [0m81.17    [0m | [0m4.25     [0m | [0m0.08121  [0m |
| [0m89       [0m | [0m-15.99   [0m | [0m0.006159 [0m | [0m81.82    [0m | [0m0.9488   [0m | [0m0.9362   [0m | [0m100.7    [0m | [0m1.873    [0m | [0m0.0174   [0m |
| [0m90       [0m | [0m-16.27   [0m | [0m0.0979   [0m | [0m169.0    [0m | [0m0.8163   [0m | [0m0.9851   [0m | [0m114.3    [0m | [0m1.029    [0m | [0m0.02625  [0m |




| [0m91       [0m | [0m-631.2   [0m | [0m1e-06    [0m | [0m123.1    [0m | [0m0.999    [0m | [0m0.9      [0m | [0m83.34    [0m | [0m1.0      [0m | [0m1e-06    [0m |
| [0m92       [0m | [0m-15.96   [0m | [0m0.02385  [0m | [0m190.8    [0m | [0m0.8309   [0m | [0m0.964    [0m | [0m83.35    [0m | [0m1.252    [0m | [0m0.002418 [0m |
| [0m93       [0m | [0m-33.61   [0m | [0m0.05491  [0m | [0m20.18    [0m | [0m0.9939   [0m | [0m0.9989   [0m | [0m64.01    [0m | [0m4.62     [0m | [0m0.06765  [0m |
| [0m94       [0m | [0m-115.0   [0m | [0m1e-06    [0m | [0m26.94    [0m | [0m0.999    [0m | [0m1.0      [0m | [0m113.9    [0m | [0m1.0      [0m | [0m0.1      [0m |
| [0m95       [0m | [0m-122.2   [0m | [0m0.02273  [0m | [0m153.6    [0m | [0m0.971    [0m | [0m0.9325   [0m | [0m90.83    [0m | [0m2.553    [0m | [0m0.08017  [0m |
| [0m96       [0m | [0m-30.34   [0m | [0m0.04036  [0m | [0m127.3    [0m | [0m0.82

Note: After waiting for a couple of hours I interrupted the run above. Hereafter, I tried to optimize the best architecture. But I still allow the layer size to increase.

In [8]:
def hyperparameter_function_mlp_with_cut(learning_rate_init, batch_size, alpha, beta_1, beta_2, hidden_size):
    """ Function for hyperparameter optimization
    """    
    hidden_layer_sizes = [int(hidden_size)] * 1
    # Learning rate is fixed
    mlp_regressor = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, alpha=alpha, batch_size=int(batch_size), learning_rate_init=learning_rate_init, beta_1=beta_1, beta_2=beta_2, max_iter=15000, random_state=7)
    scores = model_selection.cross_val_score(mlp_regressor, X=prep_train_data, y=random_train_balance, cv=5, scoring='neg_mean_absolute_error')
    return np.min(scores)


# Bounded region of parameter space
pbounds = {'learning_rate_init': (0.0001, 0.01), 'batch_size': (150, 250), 'alpha': (0.000001, 0.1), 'beta_1': (0.8, 0.999), 'beta_2': (0.9, 0.999999), 'hidden_size': (115, 130)}

optimizer = BayesianOptimization(
    f=hyperparameter_function_mlp_with_cut,
    pbounds=pbounds,
    random_state=25,
)
if hyper_opt:
    optimizer.maximize(
        init_points=50,
        n_iter=100,
    )

|   iter    |  target   |   alpha   | batch_... |  beta_1   |  beta_2   | hidden... | learni... |
-------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m-16.87   [0m | [0m0.08701  [0m | [0m208.2    [0m | [0m0.8555   [0m | [0m0.9186   [0m | [0m121.2    [0m | [0m0.001262 [0m |
| [95m2        [0m | [95m-15.73   [0m | [95m0.0685   [0m | [95m193.8    [0m | [95m0.9107   [0m | [95m0.9367   [0m | [95m121.0    [0m | [95m0.001219 [0m |
| [0m3        [0m | [0m-16.3    [0m | [0m0.0447   [0m | [0m208.5    [0m | [0m0.8322   [0m | [0m0.9521   [0m | [0m119.9    [0m | [0m0.007022 [0m |
| [0m4        [0m | [0m-19.04   [0m | [0m0.03664  [0m | [0m233.6    [0m | [0m0.8958   [0m | [0m0.9516   [0m | [0m120.7    [0m | [0m0.009976 [0m |
| [0m5        [0m | [0m-16.2    [0m | [0m0.05142  [0m | [0m205.9    [0m | [0m0.8069   [0m | [0m0.972    [0m | [0m121.3    [0m | [0m0.



| [0m21       [0m | [0m-16.18   [0m | [0m0.04096  [0m | [0m161.0    [0m | [0m0.8171   [0m | [0m0.9748   [0m | [0m117.7    [0m | [0m0.0002494[0m |
| [0m22       [0m | [0m-21.96   [0m | [0m0.09138  [0m | [0m242.6    [0m | [0m0.916    [0m | [0m0.9313   [0m | [0m130.0    [0m | [0m0.001585 [0m |
| [0m23       [0m | [0m-15.55   [0m | [0m0.09945  [0m | [0m162.5    [0m | [0m0.8699   [0m | [0m0.9531   [0m | [0m119.9    [0m | [0m0.008084 [0m |
| [0m24       [0m | [0m-16.24   [0m | [0m0.09603  [0m | [0m203.9    [0m | [0m0.8994   [0m | [0m0.9801   [0m | [0m127.0    [0m | [0m0.001287 [0m |
| [0m25       [0m | [0m-17.04   [0m | [0m0.0246   [0m | [0m212.7    [0m | [0m0.8751   [0m | [0m0.9063   [0m | [0m120.6    [0m | [0m0.0008337[0m |
| [0m26       [0m | [0m-16.33   [0m | [0m0.004445 [0m | [0m179.3    [0m | [0m0.9502   [0m | [0m0.9953   [0m | [0m123.8    [0m | [0m0.009881 [0m |
| [0m27       [0m | [0m-1



| [0m51       [0m | [0m-45.38   [0m | [0m0.1      [0m | [0m202.3    [0m | [0m0.8915   [0m | [0m0.9271   [0m | [0m127.5    [0m | [0m0.0001   [0m |
| [0m52       [0m | [0m-16.84   [0m | [0m0.05541  [0m | [0m192.3    [0m | [0m0.9032   [0m | [0m0.9971   [0m | [0m119.4    [0m | [0m0.004392 [0m |
| [0m53       [0m | [0m-16.16   [0m | [0m0.04935  [0m | [0m204.9    [0m | [0m0.8834   [0m | [0m0.9744   [0m | [0m128.7    [0m | [0m0.004229 [0m |
| [0m54       [0m | [0m-271.7   [0m | [0m0.005701 [0m | [0m164.1    [0m | [0m0.9864   [0m | [0m0.9667   [0m | [0m118.3    [0m | [0m0.009098 [0m |
| [0m55       [0m | [0m-17.36   [0m | [0m0.07695  [0m | [0m160.7    [0m | [0m0.8407   [0m | [0m0.9947   [0m | [0m120.0    [0m | [0m0.005941 [0m |
| [0m56       [0m | [0m-15.42   [0m | [0m0.05089  [0m | [0m162.2    [0m | [0m0.838    [0m | [0m0.9218   [0m | [0m121.6    [0m | [0m0.001384 [0m |
| [0m57       [0m | [0m-3



| [0m74       [0m | [0m-50.92   [0m | [0m1e-06    [0m | [0m177.1    [0m | [0m0.8      [0m | [0m0.983    [0m | [0m120.1    [0m | [0m0.0001   [0m |
| [0m75       [0m | [0m-16.16   [0m | [0m0.04234  [0m | [0m180.1    [0m | [0m0.9336   [0m | [0m0.9125   [0m | [0m122.5    [0m | [0m0.003437 [0m |
| [0m76       [0m | [0m-17.68   [0m | [0m1e-06    [0m | [0m180.6    [0m | [0m0.8785   [0m | [0m1.0      [0m | [0m123.9    [0m | [0m0.01     [0m |
| [0m77       [0m | [0m-15.63   [0m | [0m0.08615  [0m | [0m153.1    [0m | [0m0.8422   [0m | [0m0.9136   [0m | [0m126.1    [0m | [0m0.0009328[0m |
| [0m78       [0m | [0m-15.93   [0m | [0m0.06506  [0m | [0m179.7    [0m | [0m0.8491   [0m | [0m0.9657   [0m | [0m125.2    [0m | [0m0.00051  [0m |
| [95m79       [0m | [95m-14.84   [0m | [95m0.06043  [0m | [95m189.7    [0m | [95m0.9294   [0m | [95m0.9209   [0m | [95m121.2    [0m | [95m0.002745 [0m |
| [95m80       [0m

In [9]:
optimizer.max

{'target': -14.624831632476509,
 'params': {'alpha': 0.058428237542730146,
  'batch_size': 158.60572041092112,
  'beta_1': 0.9355582054882612,
  'beta_2': 0.9089543612935872,
  'hidden_size': 126.6933372337246,
  'learning_rate_init': 0.001283462649989831}}