In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot
import keras
import tensorflow as tf
from keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential 
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import GridSearchCV
import sklearn
from sklearn.model_selection import RandomizedSearchCV

from keras.wrappers.scikit_learn import KerasClassifier

from skopt import dummy_minimize


from keras.callbacks import ReduceLROnPlateau

from skopt import gp_minimize
from skopt.utils import use_named_args

In [2]:
(trainX, trainy), (testX, testy) = fashion_mnist.load_data()

print('Train: X=%s, y=%s' % (trainX.shape, trainy.shape))
print('Test: X=%s, y=%s' % (testX.shape, testy.shape))

def load_dataset():
    (trainX, trainY), (testX, testY) = fashion_mnist.load_data()
    trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
    testX = testX.reshape((testX.shape[0], 28, 28, 1))
    trainY = to_categorical(trainY)
    testY = to_categorical(testY)
    return trainX, trainY, testX, testY

def prep_pixels(train, test):
    # convert from integers to floats
    train_norm = train.astype('float32')
    test_norm = test.astype('float32')
    # normalize to range 0-1
    train_norm = train_norm / 255.0
    test_norm = test_norm / 255.0
    # return normalized images
    return train_norm, test_norm   

Train: X=(60000, 28, 28), y=(60000,)
Test: X=(10000, 28, 28), y=(10000,)


In [3]:
#TEST RUN

trainX, trainY, testX, testY = load_dataset()
trainX, testX = prep_pixels(trainX, testX)



In [4]:
pip install optuna

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install mlflow

Collecting mlflow
  Downloading mlflow-1.24.0-py3-none-any.whl (16.5 MB)
Collecting databricks-cli>=0.8.7
  Downloading databricks-cli-0.16.4.tar.gz (58 kB)
Collecting sqlparse>=0.3.1
  Downloading sqlparse-0.4.2-py3-none-any.whl (42 kB)
Collecting gitpython>=2.1.0
Note: you may need to restart the kernel to use updated packages.
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
Collecting waitress
  Downloading waitress-2.1.1-py3-none-any.whl (57 kB)
Collecting docker>=4.0.0
  Downloading docker-5.0.3-py2.py3-none-any.whl (146 kB)
Collecting prometheus-flask-exporter
  Downloading prometheus_flask_exporter-0.19.0-py3-none-any.whl (18 kB)
Collecting querystring-parser
  Downloading querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)
Collecting websocket-client>=0.32.0
  Downloading websocket_client-1.3.1-py3-none-any.whl (54 kB)
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
Collecting smmap<6,>=3.0.1
  Downloading smmap-5.0.0-py3-none-any.whl (

In [6]:
import optuna
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import RMSprop
import mlflow
from mlflow.tracking import MlflowClient
from mlflow.utils.mlflow_tags import MLFLOW_PARENT_RUN_ID

In [7]:

def objective(trial):

  with mlflow.start_run()as run:
    model=Sequential()

  #CONVOLUTIONAL LAYER

    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
  
  #MAX POOLING LAYER
    model.add(MaxPooling2D((2, 2)))

  #FLATTENING THE OUTPUT

    model.add(Flatten())

  # DEFINE A SEARCH SPACE FOR REGULARIZER

    regularizer_type = trial.suggest_categorical('regularizer_type',['l1','l2','l1_l2'])

    mlflow.log_param("regularizer_type", regularizer_type)


    if regularizer_type == 'l1':
      regularizer = regularizers.l1(l1=trial.suggest_loguniform('weight',0.001,0.1))
      

    if regularizer_type =='l2':
      regularizer = regularizers.l2(l2=trial.suggest_loguniform('weight',0.001,0.1))
      

    if regularizer_type =='l1_l2':
      regularizer = regularizers.l1_l2(l1=trial.suggest_loguniform('weight1',0.001,0.1),l2=trial.suggest_loguniform('weight2',0.001,0.1))
      
    
 #DENSE LAYERS
    model.add(Dense(188, activation='relu', kernel_initializer='he_uniform',kernel_regularizer=regularizer))
    model.add(Dense(10, activation='softmax'))


  #VARYING THE OPTIMIZER

    optimizer_name = trial.suggest_categorical('optimizer_name',['Adam','RMSprop'])

    mlflow.log_param("optimizer_name", optimizer_name)


    if optimizer_name == 'Adam':
      optimizer = Adam(learning_rate = trial.suggest_float('learning_rate',0.00001,0.0001))

    if optimizer_name == 'RMSprop':
      optimizer = RMSprop(learning_rate= trial.suggest_float('learning_rate',0.00001,0.0001),momentum=trial.suggest_float('momentum',0.6,0.95))

  #COMPILE THE MODEL
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    #VARY THE BATCH SIZE

    batch_size = trial.suggest_categorical('batch_size',[32,64,128])
    mlflow.log_param("batch_size", batch_size)


  #FIT THE MODEL

    history= model.fit(
        trainX,
        trainY,
        batch_size=batch_size,
        epochs=5,
        verbose=1,
        validation_data=(testX, testY))
    
    
  
    accuracy = history.history['val_accuracy'][-1]

    mlflow.log_metric("accuracy",accuracy)
  
    print('accuracy is ', accuracy)
  
    global best_accuracy
  
    del model

    

    return accuracy

In [None]:
from optuna.integration.mlflow import MLflowCallback

mlflow.set_experiment('my_experiment')

if __name__ == "__main__":
  mlflc = MLflowCallback(tracking_uri='ml_exp1',metric_name='accuracy')
  study= optuna.create_study(
    direction='maximize',
    study_name='cnn_study',
    load_if_exists=True)
  study.optimize(objective, n_trials=30, callbacks=[mlflc])

  mlflc = MLflowCallback(tracking_uri='ml_exp1',metric_name='accuracy')
[32m[I 2022-03-22 23:27:48,213][0m A new study created in memory with name: cnn_study[0m
The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh()

All git commands will error until this is rectified.

$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - error|e|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[32m[I 2022-03-22 23:32:18,335][0m Trial 0 finished with value: 0.8047000169754028 and parameters: {'regularizer_type': 'l1', 'weight': 0.0011301156984240145, 'optimizer_name': 'Adam', 'learning_rate': 2.639184046128422e-05, 'batch_size': 32}. Best is trial 0 with value: 0.8047000169754028.[0m
2022/03/22 23:32:18 INFO mlflow.tracking.fluent: Experiment with name 'cnn_study' does not exist. Creating a new experiment.


accuracy is  0.8047000169754028
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[32m[I 2022-03-22 23:36:01,559][0m Trial 1 finished with value: 0.7615000009536743 and parameters: {'regularizer_type': 'l1', 'weight': 0.012893358719873663, 'optimizer_name': 'RMSprop', 'learning_rate': 6.123906213143902e-05, 'momentum': 0.8371379734070274, 'batch_size': 64}. Best is trial 0 with value: 0.8047000169754028.[0m


accuracy is  0.7615000009536743
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[32m[I 2022-03-22 23:40:40,451][0m Trial 2 finished with value: 0.8689000010490417 and parameters: {'regularizer_type': 'l2', 'weight': 0.00837534470427676, 'optimizer_name': 'RMSprop', 'learning_rate': 5.401834617262302e-05, 'momentum': 0.7886624671275719, 'batch_size': 32}. Best is trial 2 with value: 0.8689000010490417.[0m


accuracy is  0.8689000010490417
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[32m[I 2022-03-22 23:44:44,893][0m Trial 3 finished with value: 0.761900007724762 and parameters: {'regularizer_type': 'l1', 'weight': 0.007470898138984721, 'optimizer_name': 'Adam', 'learning_rate': 9.523066251816328e-05, 'batch_size': 32}. Best is trial 2 with value: 0.8689000010490417.[0m


accuracy is  0.761900007724762
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[32m[I 2022-03-22 23:50:21,722][0m Trial 4 finished with value: 0.7657999992370605 and parameters: {'regularizer_type': 'l1_l2', 'weight1': 0.017215444150637284, 'weight2': 0.08689870620378358, 'optimizer_name': 'RMSprop', 'learning_rate': 7.410919663696737e-05, 'momentum': 0.9422595625850847, 'batch_size': 32}. Best is trial 2 with value: 0.8689000010490417.[0m


accuracy is  0.7657999992370605
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[32m[I 2022-03-22 23:56:33,146][0m Trial 5 finished with value: 0.698199987411499 and parameters: {'regularizer_type': 'l1_l2', 'weight1': 0.018362265940928695, 'weight2': 0.013778498224789245, 'optimizer_name': 'Adam', 'learning_rate': 2.073227261691051e-05, 'batch_size': 32}. Best is trial 2 with value: 0.8689000010490417.[0m


accuracy is  0.698199987411499
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[32m[I 2022-03-22 23:59:27,142][0m Trial 6 finished with value: 0.5397999882698059 and parameters: {'regularizer_type': 'l1_l2', 'weight1': 0.04548510393355477, 'weight2': 0.001959260541771335, 'optimizer_name': 'Adam', 'learning_rate': 3.4289295336526485e-05, 'batch_size': 128}. Best is trial 2 with value: 0.8689000010490417.[0m


accuracy is  0.5397999882698059
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[32m[I 2022-03-23 00:02:24,192][0m Trial 7 finished with value: 0.5863000154495239 and parameters: {'regularizer_type': 'l1_l2', 'weight1': 0.03013434469951901, 'weight2': 0.01784410043608344, 'optimizer_name': 'Adam', 'learning_rate': 5.254992588557392e-05, 'batch_size': 128}. Best is trial 2 with value: 0.8689000010490417.[0m


accuracy is  0.5863000154495239
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[32m[I 2022-03-23 00:06:07,797][0m Trial 8 finished with value: 0.734000027179718 and parameters: {'regularizer_type': 'l1', 'weight': 0.015118878129860746, 'optimizer_name': 'Adam', 'learning_rate': 9.174521715770914e-05, 'batch_size': 64}. Best is trial 2 with value: 0.8689000010490417.[0m


accuracy is  0.734000027179718
Epoch 1/5
Epoch 2/5