<a href="https://colab.research.google.com/github/thanit456/Optuna_tutorial/blob/master/optuna_tf_eager_effiecientnetb4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install optuna
!pip install efficientnet

Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/33/32/266d4afd269e3ecd7fcc595937c1733f65eae6c09c3caea74c0de0b88d78/optuna-1.5.0.tar.gz (200kB)
[K     |████████████████████████████████| 204kB 8.4MB/s 
[?25hCollecting alembic
[?25l  Downloading https://files.pythonhosted.org/packages/60/1e/cabc75a189de0fbb2841d0975243e59bde8b7822bacbb95008ac6fe9ad47/alembic-1.4.2.tar.gz (1.1MB)
[K     |████████████████████████████████| 1.1MB 15.2MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting cliff
[?25l  Downloading https://files.pythonhosted.org/packages/52/59/4db149d8962dc29a37c8bc08cd79185935527af9a27259a2d80cac707212/cliff-3.3.0-py3-none-any.whl (81kB)
[K     |████████████████████████████████| 81kB 9.9MB/s 
[?25hCollecting cmaes>=0.5.0
  Downloading https://files.pythonhosted.org/packages/03/de/6ed34ebc0e5c34ed371d898540bca36edb

In [13]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import optuna

N_TRAIN_EXAMPLES = 3000
N_VALID_EXAMPLES = 1000
BATCHSIZE = 128
CLASSES = 10
EPOCHS = 1

In [14]:
def efficientnetb4_teacher(input_shape = (250, 100, 3), num_classes = 5):
    # 285, 120 2.44 | 414, 163, 2.65
    # import sys
    # sys.path.append('model/')
    import efficientnet.tfkeras as efn
    backbone = efn.EfficientNetB4(input_shape = input_shape, weights='imagenet', include_top = False)
    pooler = tf.keras.layers.GlobalAveragePooling2D()(backbone.output)
    out = tf.keras.layers.Dense(num_classes)(pooler)
    softmax = tf.keras.layers.Activation('softmax')(out)

    model = tf.keras.models.Model(inputs = backbone.input, outputs = softmax)
    return model

In [61]:
def create_efficientnet_model(trial):
  num_classes = 10

  aspect_ratio = trial.suggest_uniform("aspect_ratio", 2.0, 3.5)
  width = trial.suggest_int("width", 50, 200)
  height = int(aspect_ratio * width)
  model = efficientnetb4_teacher(input_shape=(height, width, 3), num_classes=num_classes)
  return model

In [62]:
def create_efficientnet_optimizer(trial):
  learning_rate_fn = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=trial.suggest_loguniform('initial_learning_rate', 1e-5, 1e-2),
    decay_steps=trial.suggest_int('decay_steps', 300, 10000),
    decay_rate=trial.suggest_uniform('decay_rate', 0.1, 0.9),
    staircase=True
  ) 
  optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn)
  # optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer(optimizer, loss_scale='dynamic')
  return optimizer

In [70]:
def loss_function(model, images, labels, mode='eval'):
  logits = model(images, training=(mode=='training'))
  print(f"logits : {logits}")
  print(f"labels : {labels}")

  loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

  loss_value = loss_object(y_pred=logits, y_true=labels)
  
  return loss_value, logits

@tf.function
def train_step(model, optimizer, images, labels):
  with tf.GradientTape() as tape:
    loss_value, logits = loss_function(model, images, labels, mode='training')

  batch_loss = loss_value / images.shape[0]

  grads = tape.gradients(loss_value, model.variables)
  optimizer.apply_gradients(zip(grads, model.variabels))

@tf.function
def test_step(model, images, labels):
  loss_value, logits = loss_function(model, images, labels)
  # batch_loss = loss_value / images.shape[0]
  return logits 
  

In [71]:
def learn(model, optimizer, dataset, mode='eval'):
  accuracy = tf.metrics.CategoricalAccuracy()

  for batch, (images, labels) in enumerate(dataset):
    if mode == 'training':
      train_step(model, optimizer, images, labels)
    else:
      logits = test_step(model, images, labels)
      accuracy(tf.argmax(logits, axis=1, output_type=tf.int32), tf.cast(labels, tf.int32))
  if mode == 'eval':
    return accuracy


In [72]:
from tensorflow.keras.datasets import cifar10
from tensorflow import keras

def get_cifar10():
  num_classes = 10
  (x_train, y_train), (x_valid, y_valid) = cifar10.load_data()
  x_train = x_train.astype("float32") / 255
  x_valid = x_valid.astype("float32") / 255

  y_train = y_train.astype("int32")
  y_valid = y_valid.astype("int32")
  y_train = keras.utils.to_categorical(y_train, num_classes)
  y_valid = keras.utils.to_categorical(y_valid, num_classes)

  train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
  train_ds = train_ds.shuffle(60000).batch(BATCHSIZE).take(N_TRAIN_EXAMPLES)

  valid_ds = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
  valid_ds = valid_ds.shuffle(10000).batch(BATCHSIZE).take(N_VALID_EXAMPLES)
  return train_ds, valid_ds


In [73]:
def get_mnist():
    (x_train, y_train), (x_valid, y_valid) = mnist.load_data()
    x_train = x_train.astype("float32") / 255
    x_valid = x_valid.astype("float32") / 255

    # y_train = y_train.astype("int32")
    # y_valid = y_valid.astype("int32")
    # y_train = keras.utils.to_categorical(y_train, num_classes)

    train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_ds = train_ds.shuffle(60000).batch(BATCHSIZE).take(N_TRAIN_EXAMPLES)

    valid_ds = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
    valid_ds = valid_ds.shuffle(10000).batch(BATCHSIZE).take(N_VALID_EXAMPLES)
    return train_ds, valid_ds

In [74]:
def objective(trial):
    # Get MNIST data.
    train_ds, valid_ds = get_cifar10()

    # Build model and optimizer.
    model = create_efficientnet_model(trial)
    optimizer = create_efficientnet_optimizer(trial)

    # Training and validating cycle.
    with tf.device("/gpu:0"):
        for _ in range(EPOCHS):
            learn(model, optimizer, train_ds, "train")

        accuracy = learn(model, optimizer, valid_ds, "eval")

    # Return last validation accuracy.
    return accuracy.result()

In [75]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

print("Number of finished trials: ", len(study.trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

logits : Tensor("model_9/activation_9/Softmax:0", shape=(128, 10), dtype=float32)
labels : Tensor("labels:0", shape=(128, 10), dtype=float32)
logits : Tensor("model_9/activation_9/Softmax:0", shape=(80, 10), dtype=float32)
labels : Tensor("labels:0", shape=(80, 10), dtype=float32)
logits : Tensor("model_9/activation_9/Softmax:0", shape=(16, 10), dtype=float32)
labels : Tensor("labels:0", shape=(16, 10), dtype=float32)


[I 2020-06-19 10:26:03,636] Finished trial#0 with value: 0.09619999676942825 with parameters: {'aspect_ratio': 3.45055763704928, 'width': 136, 'initial_learning_rate': 1.7919547957666894e-05, 'decay_steps': 9427, 'decay_rate': 0.7080699581839927}. Best is trial#0 with value: 0.09619999676942825.


logits : Tensor("model_10/activation_10/Softmax:0", shape=(128, 10), dtype=float32)
labels : Tensor("labels:0", shape=(128, 10), dtype=float32)
logits : Tensor("model_10/activation_10/Softmax:0", shape=(80, 10), dtype=float32)
labels : Tensor("labels:0", shape=(80, 10), dtype=float32)
logits : Tensor("model_10/activation_10/Softmax:0", shape=(16, 10), dtype=float32)
labels : Tensor("labels:0", shape=(16, 10), dtype=float32)


[I 2020-06-19 10:26:32,404] Finished trial#1 with value: 0.03550000116229057 with parameters: {'aspect_ratio': 3.113964910036664, 'width': 152, 'initial_learning_rate': 0.007278901989352572, 'decay_steps': 2138, 'decay_rate': 0.7425932825666776}. Best is trial#0 with value: 0.09619999676942825.


logits : Tensor("model_11/activation_11/Softmax:0", shape=(128, 10), dtype=float32)
labels : Tensor("labels:0", shape=(128, 10), dtype=float32)
logits : Tensor("model_11/activation_11/Softmax:0", shape=(80, 10), dtype=float32)
labels : Tensor("labels:0", shape=(80, 10), dtype=float32)
logits : Tensor("model_11/activation_11/Softmax:0", shape=(16, 10), dtype=float32)
labels : Tensor("labels:0", shape=(16, 10), dtype=float32)


[I 2020-06-19 10:27:01,676] Finished trial#2 with value: 0.04820000007748604 with parameters: {'aspect_ratio': 3.0971463417622775, 'width': 127, 'initial_learning_rate': 1.98962863006705e-05, 'decay_steps': 3802, 'decay_rate': 0.23641971883439483}. Best is trial#0 with value: 0.09619999676942825.


logits : Tensor("model_12/activation_12/Softmax:0", shape=(128, 10), dtype=float32)
labels : Tensor("labels:0", shape=(128, 10), dtype=float32)
logits : Tensor("model_12/activation_12/Softmax:0", shape=(80, 10), dtype=float32)
labels : Tensor("labels:0", shape=(80, 10), dtype=float32)
logits : Tensor("model_12/activation_12/Softmax:0", shape=(16, 10), dtype=float32)
labels : Tensor("labels:0", shape=(16, 10), dtype=float32)


[I 2020-06-19 10:27:30,706] Finished trial#3 with value: 0.033900000154972076 with parameters: {'aspect_ratio': 2.0883681929428772, 'width': 137, 'initial_learning_rate': 0.004890569984130645, 'decay_steps': 3833, 'decay_rate': 0.5052308508767134}. Best is trial#0 with value: 0.09619999676942825.


logits : Tensor("model_13/activation_13/Softmax:0", shape=(128, 10), dtype=float32)
labels : Tensor("labels:0", shape=(128, 10), dtype=float32)
logits : Tensor("model_13/activation_13/Softmax:0", shape=(80, 10), dtype=float32)
labels : Tensor("labels:0", shape=(80, 10), dtype=float32)
logits : Tensor("model_13/activation_13/Softmax:0", shape=(16, 10), dtype=float32)
labels : Tensor("labels:0", shape=(16, 10), dtype=float32)


[I 2020-06-19 10:27:59,967] Finished trial#4 with value: 0.03460000082850456 with parameters: {'aspect_ratio': 2.52987735904297, 'width': 174, 'initial_learning_rate': 0.0007059214146071075, 'decay_steps': 5820, 'decay_rate': 0.34020033093630603}. Best is trial#0 with value: 0.09619999676942825.


logits : Tensor("model_14/activation_14/Softmax:0", shape=(128, 10), dtype=float32)
labels : Tensor("labels:0", shape=(128, 10), dtype=float32)
logits : Tensor("model_14/activation_14/Softmax:0", shape=(80, 10), dtype=float32)
labels : Tensor("labels:0", shape=(80, 10), dtype=float32)
logits : Tensor("model_14/activation_14/Softmax:0", shape=(16, 10), dtype=float32)
labels : Tensor("labels:0", shape=(16, 10), dtype=float32)


[I 2020-06-19 10:28:29,241] Finished trial#5 with value: 0.10000000149011612 with parameters: {'aspect_ratio': 2.6715700823813266, 'width': 166, 'initial_learning_rate': 0.0004960128625165417, 'decay_steps': 429, 'decay_rate': 0.6593241806051304}. Best is trial#5 with value: 0.10000000149011612.


logits : Tensor("model_15/activation_15/Softmax:0", shape=(128, 10), dtype=float32)
labels : Tensor("labels:0", shape=(128, 10), dtype=float32)
logits : Tensor("model_15/activation_15/Softmax:0", shape=(80, 10), dtype=float32)
labels : Tensor("labels:0", shape=(80, 10), dtype=float32)
logits : Tensor("model_15/activation_15/Softmax:0", shape=(16, 10), dtype=float32)
labels : Tensor("labels:0", shape=(16, 10), dtype=float32)


[I 2020-06-19 10:28:58,961] Finished trial#6 with value: 0.02329999953508377 with parameters: {'aspect_ratio': 2.432205400590434, 'width': 74, 'initial_learning_rate': 0.0018337591634283758, 'decay_steps': 6304, 'decay_rate': 0.8642302879913556}. Best is trial#5 with value: 0.10000000149011612.


logits : Tensor("model_16/activation_16/Softmax:0", shape=(128, 10), dtype=float32)
labels : Tensor("labels:0", shape=(128, 10), dtype=float32)
logits : Tensor("model_16/activation_16/Softmax:0", shape=(80, 10), dtype=float32)
labels : Tensor("labels:0", shape=(80, 10), dtype=float32)
logits : Tensor("model_16/activation_16/Softmax:0", shape=(16, 10), dtype=float32)
labels : Tensor("labels:0", shape=(16, 10), dtype=float32)


[I 2020-06-19 10:29:28,567] Finished trial#7 with value: 0.0738999992609024 with parameters: {'aspect_ratio': 2.3258216371318197, 'width': 71, 'initial_learning_rate': 1.2406914796611586e-05, 'decay_steps': 3945, 'decay_rate': 0.3542880045546286}. Best is trial#5 with value: 0.10000000149011612.


logits : Tensor("model_17/activation_17/Softmax:0", shape=(128, 10), dtype=float32)
labels : Tensor("labels:0", shape=(128, 10), dtype=float32)
logits : Tensor("model_17/activation_17/Softmax:0", shape=(80, 10), dtype=float32)
labels : Tensor("labels:0", shape=(80, 10), dtype=float32)
logits : Tensor("model_17/activation_17/Softmax:0", shape=(16, 10), dtype=float32)
labels : Tensor("labels:0", shape=(16, 10), dtype=float32)


Exception ignored in: <function WeakKeyDictionary.__init__.<locals>.remove at 0x7fe56ec5a8c8>
Traceback (most recent call last):
  File "/usr/lib/python3.6/weakref.py", line 356, in remove
    def remove(k, selfref=ref(self)):
KeyboardInterrupt
[I 2020-06-19 10:29:58,429] Finished trial#8 with value: 0.05730000138282776 with parameters: {'aspect_ratio': 2.6219367606018817, 'width': 122, 'initial_learning_rate': 0.004101996778471789, 'decay_steps': 8444, 'decay_rate': 0.788332352688461}. Best is trial#5 with value: 0.10000000149011612.


KeyboardInterrupt: ignored