In [1]:
# importing libraries

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import QuantileTransformer
from sklearn.compose import ColumnTransformer
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from optuna.samplers import TPESampler

import optuna

In [2]:
# loading data

data = pd.read_csv('cereal.csv')
data.head()

Unnamed: 0,name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
0,100% Bran,N,C,70,4,1,130,10.0,5.0,6,280,25,3,1.0,0.33,68.402973
1,100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8,135,0,3,1.0,1.0,33.983679
2,All-Bran,K,C,70,4,1,260,9.0,7.0,5,320,25,3,1.0,0.33,59.425505
3,All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0,330,25,3,1.0,0.5,93.704912
4,Almond Delight,R,C,110,2,2,200,1.0,14.0,8,-1,25,3,1.0,0.75,34.384843


In [3]:
# defining X and y

X = data.drop('rating', axis=1)
y = data['rating']

In [4]:
cat_columns = ['mfr', 'type']
num_columns = X.select_dtypes(include=np.number).columns.tolist()

In [5]:
# splitting data in train and validation

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=42)

In [6]:
# creating preprocessor

cat_transformer = OneHotEncoder(handle_unknown='ignore')
num_transformer = QuantileTransformer(n_quantiles=100, output_distribution='normal')



preprocessor = ColumnTransformer(
        transformers=[
            ('num', num_transformer, num_columns),
            ('cat', cat_transformer, cat_columns)
        ])



In [7]:
# preprocessing data

X_train = preprocessor.fit_transform(X_train)
X_val = preprocessor.transform(X_val)

  % (self.n_quantiles, n_samples))


In [30]:
def objective(trial):
    model = keras.models.Sequential()
    model.add(
        keras.layers.Dense(
            50,
            activation=trial.suggest_categorical("activation", ["relu", "linear", 'swish']),
            input_shape=[21],
            ))
    model.add(
        keras.layers.Dense(10,
        activation=trial.suggest_categorical("activation", ["relu", "linear", 'swish']),)
    )
    model.add(
        keras.layers.Dense(1)
    )
    # defining suggestion for learning rate and optimizer
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])

    if optimizer_name=="Adam":
        optimizer = Adam(learning_rate=lr)
    elif optimizer_name=="RMSprop":
        optimizer = RMSprop(learning_rate=lr)
    elif optimizer_name=="SGD":
        optimizer = SGD(learning_rate=lr)
    
    # compiling model
    model.compile(
        loss="mae", optimizer=optimizer
    )

    #fitting model
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        shuffle=True,
        batch_size=32,
        epochs=10,
        verbose=False,
    )

    score = model.evaluate(X_val, y_val, verbose=0)
    return score

In [31]:
# create Optuna study

study = optuna.create_study(direction="minimize", sampler=TPESampler(multivariate=True))
study.optimize(objective, n_trials=100, timeout=600)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2021-03-01 16:45:43,533][0m A new study created in memory with name: no-name-74b5a9ae-88d7-4862-b888-bd5686f9e6d5[0m
[32m[I 2021-03-01 16:45:44,356][0m Trial 0 finished with value: 7.599639415740967 and parameters: {'activation': 'linear', 'lr': 0.047618409677743265, 'optimizer': 'Adam'}. Best is trial 0 with value: 7.599639415740967.[0m
[32m[I 2021-03-01 16:45:45,156][0m Trial 1 finished with value: 42.255958557128906 and parameters: {'activation': 'linear', 'lr': 0.00018470009306643094, 'optimizer': 'Adam'}. Best is trial 0 with value: 7.599639415740967.[0m
[32m[I 2021-03-01 16:45:45,877][0m Trial 2 finished with value: 42.633541107177734 and parameters: {'activation': 'relu', 'lr': 3.634656181437189e-05, 'optimizer': 'SGD'}. Best is trial 0 with value: 7.599639415740967.[0m
[32m[I 2021-03-01 16:45:46,593][0m Trial 3 finished with value: 40.67893981933594 and parameters: {'activation': 'relu', 'lr': 0.001646895871318738, 'optimizer': 'SGD'}. Best is trial 0 with 

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=9f2954cd-f149-4d4f-a4b0-a1e3eb6d921f' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>