In [5]:
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import math
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import mean_squared_error
import random

### Open Drugs Dataset and Assign Predictor and Target Variables


In [6]:
x = pd.read_csv("x_train_red.csv")
x_normalized = (x - x.min()) / (x.max() - x.min())
x_normalized.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,0.563636,0.231405,0.41,0.089041,0.120401,0.089552,0.038869,0.611601,0.267717,0.173913,0.160714
1,0.209091,0.512397,0.03,0.09589,0.070234,0.089552,0.035336,0.367841,0.559055,0.149068,0.553571
2,0.718182,0.396694,0.49,0.232877,0.147157,0.059701,0.028269,0.802496,0.401575,0.217391,0.625
3,0.209091,0.347107,0.04,0.143836,0.108696,0.089552,0.074205,0.633627,0.748031,0.322981,0.178571
4,0.381818,0.264463,0.49,0.130137,0.118729,0.253731,0.371025,0.596916,0.440945,0.130435,0.196429


In [7]:
## Since y is already one-hot encoded, no need to get dummies
y = pd.read_csv("y_train_red.csv")
y.head()

Unnamed: 0,3,4,5,6,7,8
0,0,0,0,1,0,0
1,0,0,0,1,0,0
2,0,0,0,1,0,0
3,0,0,0,1,0,0
4,0,0,1,0,0,0


In [8]:
x_train, x_test, y_train, y_test = train_test_split(x_normalized, y, test_size=0.3)

len_input = len(x_train.columns)
len_output = len(y_train.columns)

print("Len Input: {}".format(len_input))
print("Len Output: {}".format(len_output))

Len Input: 11
Len Output: 6


In [9]:
def objective_function(x, y, x_validation, y_validation, params):
    model = MLPClassifier(
        random_state=1, 
        max_iter=1000,
        alpha=params['alpha'],
        learning_rate_init=params['learning_rate']
    )

    model.fit(x, y)

    predictions = model.predict(x_validation)

    return mean_squared_error(predictions, y_validation)

In [10]:
grid_parameters = [
    { 'alpha': 0.002, 'learning_rate': 0.01 },
    { 'alpha': 0.003, 'learning_rate': 0.009 },
    { 'alpha': 0.004, 'learning_rate': 0.008 },
    { 'alpha': 0.005, 'learning_rate': 0.007 },
    { 'alpha': 0.006, 'learning_rate': 0.006 },
    { 'alpha': 0.007, 'learning_rate': 0.005 },
    { 'alpha': 0.008, 'learning_rate': 0.004 },
    { 'alpha': 0.009, 'learning_rate': 0.003 }
]

In [11]:
best_params = None
current_loss = 1000

for params in grid_parameters:
    loss = objective_function(x_train.values, y_train.values, x_test.values, y_test.values, params)
    print("Loss: {}".format(loss))

    if loss < current_loss:
        best_params = params
        current_loss = loss

print("Best Loss: {}".format(current_loss))
print("Best params: {}".format(best_params))

Loss: 0.12710765239948119
Loss: 0.12775616083009078
Loss: 0.13683527885862515
Loss: 0.1238651102464332
Loss: 0.1309987029831388




Loss: 0.13359273670557717




Loss: 0.13553826199740596
Loss: 0.12386511024643321
Best Loss: 0.1238651102464332
Best params: {'alpha': 0.005, 'learning_rate': 0.007}




In [12]:
num_samples = 10
best_params = None
current_loss = 1000

for i in range(num_samples):
    params = {
        'alpha': random.randrange(0, 100) * 0.0001,
        'learning_rate': random.randrange(0, 100) * 0.0001
    }

    loss = objective_function(x_train.values, y_train.values, x_test.values, y_test.values, params)
    print("Loss: {}".format(loss))

    if loss < current_loss:
        best_params = params
        current_loss = loss

print("Best Loss: {}".format(current_loss))
print("Best params: {}".format(best_params))



Loss: 0.1303501945525292




Loss: 0.1303501945525292
Loss: 0.13488975356679636




Loss: 0.13942931258106353




Loss: 0.1219195849546044




Loss: 0.1297016861219196




Loss: 0.1238651102464332




Loss: 0.13164721141374838




Loss: 0.13424124513618677
Loss: 0.12970168612191957
Best Loss: 0.1219195849546044
Best params: {'alpha': 0.0002, 'learning_rate': 0.0044}


In [13]:
# Building my current belief
x_samples = []
y_samples = []

num_samples = 10
for i in range(num_samples):
    params = {
        'alpha': random.randrange(0, 100) * 0.0001,
        'learning_rate': random.randrange(0, 100) * 0.0001
    }

    loss = objective_function(x_train.values, y_train.values, x_test.values, y_test.values, params)
    print("Loss: {}".format(loss))

    x1 = params['alpha']
    x2 = params['learning_rate']
    y = loss

    x_samples.append([x1, x2])
    y_samples.append([y])

Loss: 0.12970168612191957
Loss: 0.12840466926070038
Loss: 0.13424124513618677




Loss: 0.1303501945525292




Loss: 0.1303501945525292
Loss: 0.13229571984435798




Loss: 0.1251621271076524




Loss: 0.1264591439688716
Loss: 0.11867704280155643
Loss: 0.1303501945525292




In [14]:
x_samples

[[0.0015, 0.0055000000000000005],
 [0.0051, 0.007500000000000001],
 [0.0043, 0.0081],
 [0.0068000000000000005, 0.0014],
 [0.0066, 0.0032],
 [0.003, 0.008],
 [0.0085, 0.0006000000000000001],
 [0.0009000000000000001, 0.0059],
 [0.007200000000000001, 0.0097],
 [0.0051, 0.0018000000000000002]]

In [15]:
y_samples

[[0.12970168612191957],
 [0.12840466926070038],
 [0.13424124513618677],
 [0.1303501945525292],
 [0.1303501945525292],
 [0.13229571984435798],
 [0.1251621271076524],
 [0.1264591439688716],
 [0.11867704280155643],
 [0.1303501945525292]]

In [16]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF

kernel = 1 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2))
gaussian_process = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
gaussian_process.fit(x_samples, y_samples)

mean, std = gaussian_process.predict(x_samples, return_std=True)



In [17]:
mean

array([0.12970165, 0.1284048 , 0.13424104, 0.13035014, 0.13035017,
       0.13229583, 0.12516215, 0.12645915, 0.11867705, 0.13035024])

In [18]:
std.reshape(-1, 1)

array([[9.99989494e-06],
       [9.99990049e-06],
       [9.99972008e-06],
       [9.99986857e-06],
       [9.99998376e-06],
       [9.99990882e-06],
       [9.99998654e-06],
       [9.99992686e-06],
       [1.00000004e-05],
       [9.99994629e-06]])

In [19]:
def f(params):
    model = MLPClassifier(
        random_state=1, 
        max_iter=1000,
        alpha=params['alpha'],
        learning_rate_init=params['learning_rate']
    )
    
    model.fit(x_train.values, y_train.values)

    predictions = model.predict(x_test.values)

    score = mean_squared_error(predictions, y_test.values)
    
    return score

In [20]:
from hyperopt import hp, fmin, tpe

params_list = {
    'alpha': hp.uniform('alpha', 0.001, 0.01),
    'learning_rate': hp.uniform('learning_rate', 0.0001, 0.001)
}

best_params = fmin(fn=f, space=params_list, max_evals=24, algo=tpe.suggest)

best_params

  4%|▍         | 1/24 [00:02<00:53,  2.31s/trial, best loss: 0.12775616083009078]




  8%|▊         | 2/24 [00:04<00:52,  2.37s/trial, best loss: 0.12775616083009078]




 12%|█▎        | 3/24 [00:07<00:50,  2.41s/trial, best loss: 0.1271076523994812] 




 17%|█▋        | 4/24 [00:09<00:47,  2.39s/trial, best loss: 0.1232166018158236]




 21%|██        | 5/24 [00:11<00:45,  2.38s/trial, best loss: 0.1232166018158236]




 25%|██▌       | 6/24 [00:14<00:42,  2.36s/trial, best loss: 0.1232166018158236]




 29%|██▉       | 7/24 [00:16<00:40,  2.36s/trial, best loss: 0.1232166018158236]




 33%|███▎      | 8/24 [00:18<00:37,  2.37s/trial, best loss: 0.1232166018158236]




 38%|███▊      | 9/24 [00:21<00:35,  2.37s/trial, best loss: 0.12127107652399481]




 42%|████▏     | 10/24 [00:23<00:33,  2.38s/trial, best loss: 0.12127107652399481]




 46%|████▌     | 11/24 [00:26<00:30,  2.36s/trial, best loss: 0.12127107652399481]




 50%|█████     | 12/24 [00:28<00:28,  2.37s/trial, best loss: 0.12127107652399481]




 54%|█████▍    | 13/24 [00:30<00:26,  2.38s/trial, best loss: 0.12127107652399481]




 58%|█████▊    | 14/24 [00:33<00:23,  2.38s/trial, best loss: 0.12127107652399481]




 62%|██████▎   | 15/24 [00:35<00:21,  2.37s/trial, best loss: 0.12127107652399481]




 67%|██████▋   | 16/24 [00:37<00:18,  2.36s/trial, best loss: 0.12127107652399481]




 71%|███████   | 17/24 [00:40<00:16,  2.35s/trial, best loss: 0.12127107652399481]




 75%|███████▌  | 18/24 [00:42<00:13,  2.33s/trial, best loss: 0.12127107652399481]




 79%|███████▉  | 19/24 [00:44<00:11,  2.34s/trial, best loss: 0.12127107652399481]




 83%|████████▎ | 20/24 [00:47<00:09,  2.33s/trial, best loss: 0.12127107652399481]




 88%|████████▊ | 21/24 [00:49<00:06,  2.31s/trial, best loss: 0.12127107652399481]




 92%|█████████▏| 22/24 [00:51<00:04,  2.31s/trial, best loss: 0.12127107652399481]




 96%|█████████▌| 23/24 [00:54<00:02,  2.31s/trial, best loss: 0.12062256809338522]




100%|██████████| 24/24 [00:56<00:00,  2.35s/trial, best loss: 0.12062256809338522]





{'alpha': 0.00413879774579173, 'learning_rate': 0.0003276604337621048}

We will generate a new model with `alpha` = 0.005 and `learning_rate` = 0.007