In [None]:
!pip install optuna, python-dotenv

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import os, optuna
import warnings

from tensorflow import keras
from google.colab import drive
from dotenv import load_dotenv
load_dotenv(dotenv_path="local_paths.env")

drive.mount("/content/drive")
K = keras.backend
# Changing default dir

# optuna.logging.set_verbosity(optuna.logging.WARNING)

In [None]:
device_name = tf.test.gpu_device_name()
if device_name != "/device:GPU:0":
    raise SystemError("GPU device not found")
print("Found GPU at: {}".format(device_name))

In [None]:
!mkdir 'data'
!cp -r {os.getenv("GOOGLE_DRIVE_PATH")} 'data'

In [None]:
# %cd '/content/drive/MyDrive/maize-crop-diagnose/data'
# !unzip train.zip

In [None]:
class OneCycleScheduler(tf.keras.callbacks.Callback):
    def __init__(
        self,
        iterations,
        max_lr=1e-3,
        start_lr=None,
        start_mom=0.95,
        min_mom=0.85,
        last_iterations=None,
        last_lr=None,
    ):
        self.iterations = iterations
        self.max_lr = max_lr
        self.start_lr = start_lr or max_lr / 10
        self.start_mom = start_mom
        self.min_mom = min_mom
        self.last_iterations = last_iterations or iterations // 10 + 1
        self.half_iteration = (iterations - self.last_iterations) // 2
        self.last_lr = last_lr or self.start_lr / 1000
        self.iteration = 0

    def _interpolate(self, iter1, iter2, lr1, lr2):
        return (lr2 - lr1) * (self.iteration - iter1) / (iter2 - iter1) + lr1

    def on_batch_begin(self, batch, logs):
        if self.iteration < self.half_iteration:
            lr = self._interpolate(0, self.half_iteration, self.start_lr, self.max_lr)
            mom = self._interpolate(
                0, self.half_iteration, self.start_mom, self.min_mom
            )
        elif self.iteration < 2 * self.half_iteration:
            lr = self._interpolate(
                self.half_iteration, 2 * self.half_iteration, self.max_lr, self.start_lr
            )
            mom = self._interpolate(
                self.half_iteration,
                2 * self.half_iteration,
                self.min_mom,
                self.start_mom,
            )
        else:
            lr = self._interpolate(
                2 * self.half_iteration, self.iterations, self.start_lr, self.last_lr
            )
            mom = self.start_mom
        self.iteration += 1
        K.set_value(self.model.optimizer.learning_rate, lr)
        K.set_value(self.model.optimizer.momentum, mom)


class OneCycleSchedulerNoMom(tf.keras.callbacks.Callback):
    def __init__(
        self,
        iterations,
        max_lr=1e-3,
        start_lr=None,
        last_iterations=None,
        last_lr=None,
    ):
        self.iterations = iterations
        self.max_lr = max_lr
        self.start_lr = start_lr or max_lr / 10
        self.last_iterations = last_iterations or iterations // 10 + 1
        self.half_iteration = (iterations - self.last_iterations) // 2
        self.last_lr = last_lr or self.start_lr / 1000
        self.iteration = 0

    def _interpolate(self, iter1, iter2, lr1, lr2):
        return (lr2 - lr1) * (self.iteration - iter1) / (iter2 - iter1) + lr1

    def on_batch_begin(self, batch, logs):
        if self.iteration < self.half_iteration:
            lr = self._interpolate(0, self.half_iteration, self.start_lr, self.max_lr)
        elif self.iteration < 2 * self.half_iteration:
            lr = self._interpolate(
                self.half_iteration, 2 * self.half_iteration, self.max_lr, self.start_lr
            )
        else:
            lr = self._interpolate(
                2 * self.half_iteration, self.iterations, self.start_lr, self.last_lr
            )
        self.iteration += 1
        K.set_value(self.model.optimizer.learning_rate, lr)


def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

In [None]:
IMG_HEIGHT = 64 * 2
IMG_WIDTH = 48 * 2
BATCH_SIZE = 32
TRAIN_SIZE = 10000
TEST_SIZE = 3000
EPOCH = 50
DATA_DIR = "data/maize-crop-diagnose/data/train"

In [None]:
img_data = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
)
num_classes = len(img_data.class_names)
AUTOTUNE = tf.data.AUTOTUNE
train_set = img_data.take(TRAIN_SIZE).prefetch(buffer_size=AUTOTUNE)
test_set = img_data.skip(TRAIN_SIZE).take(TEST_SIZE)
val_set = img_data.skip(TRAIN_SIZE).skip(TEST_SIZE).prefetch(buffer_size=AUTOTUNE)

In [None]:
def objective(trial):
    K.clear_session()
    train_set, val_set = tf.keras.utils.image_dataset_from_directory(
        DATA_DIR,
        validation_split=0.2,
        subset="both",
        seed=42,
        image_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=BATCH_SIZE,
    )
    num_classes = len(train_set.class_names)
    AUTOTUNE = tf.data.AUTOTUNE
    train_set = train_set.prefetch(buffer_size=AUTOTUNE).cache()
    val_set = val_set.prefetch(buffer_size=AUTOTUNE).cache()
    # Hyperparameters for network architecture
    ## Number of filters
    filters_layer_1 = trial.suggest_categorical("filters_layer_1", [16, 32, 64])
    filters_layer_2 = trial.suggest_int("prop_filters_layer_2", 1, 4)
    filters_layer_3 = trial.suggest_int("prop_filters_layer_3", 1, 4)
    ## Kernel size
    kernel_layer_1 = trial.suggest_categorical("kernel_layer_1", [3, 5, 7])
    kernel_layer_2 = trial.suggest_categorical("kernel_layer_2", [3, 5, 7])
    kernel_layer_3 = trial.suggest_categorical("kernel_layer_3", [3, 5, 7])
    ## Activation function
    activation = trial.suggest_categorical("activation", ["elu", "selu"])
    if activation == "selu":
        kernel_initializer = "lecun_normal"
    else:
        kernel_initializer = "he_normal"
    ## Dropout rate
    dropout_rate = trial.suggest_float("dropout_rate", 0.25, 0.5)
    ## Dense layer size
    dense_size = trial.suggest_categorical("dense_size", [16, 32, 64, 128, 256, 512])
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Rescaling(1.0 / 255),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Conv2D(
                filters_layer_1,
                kernel_layer_1,
                activation=activation,
                kernel_initializer=kernel_initializer,
            ),
            tf.keras.layers.MaxPooling2D(),
            tf.keras.layers.SpatialDropout2D(dropout_rate),
            tf.keras.layers.Conv2D(
                filters_layer_1 * filters_layer_2,
                kernel_layer_2,
                activation=activation,
                kernel_initializer=kernel_initializer,
            ),
            tf.keras.layers.MaxPooling2D(),
            tf.keras.layers.SpatialDropout2D(dropout_rate),
            tf.keras.layers.Conv2D(
                filters_layer_1 * filters_layer_2 * filters_layer_3,
                kernel_layer_3,
                activation=activation,
                kernel_initializer=kernel_initializer,
            ),
            tf.keras.layers.MaxPooling2D(),
            tf.keras.layers.SpatialDropout2D(dropout_rate),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(
                dense_size, activation=activation, kernel_initializer=kernel_initializer
            ),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(dropout_rate),
            tf.keras.layers.Dense(num_classes, activation="softmax"),
        ]
    )
    # Hyperparameters for OneCycleScheduler
    max_lr = trial.suggest_float("max_lr", 1e-3, 1e-1)
    start_lr = trial.suggest_float("start_lr", max_lr * 0.01, max_lr * 0.8)
    last_lr = trial.suggest_float("last_lr", start_lr, max_lr)
    # Model definition
    onecycle = OneCycleSchedulerNoMom(
        TRAIN_SIZE // BATCH_SIZE * EPOCH,
        max_lr=max_lr,
        start_lr=start_lr,
        last_lr=last_lr,
    )
    early_stopping = tf.keras.callbacks.EarlyStopping(
        patience=5, restore_best_weights=True
    )
    model.compile(
        optimizer=tf.keras.optimizers.SGD(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"],
    )
    # Fitting model
    history = model.fit(
        train_set,
        validation_data=val_set,
        epochs=EPOCH,
        batch_size=BATCH_SIZE,
        callbacks=[early_stopping, onecycle],
    )
    # Evaluating and returning F1 score
    loss, acc = model.evaluate(val_set)
    return acc


study = optuna.create_study(
    storage="sqlite:///drive/MyDrive/maize-crop-diagnose/db_maize_models.sqlite3",
    study_name="cnn_onecycle_sgd",
    direction="maximize",
    load_if_exists=True,
)

study.optimize(objective, n_trials=100)
print(f"Best value: {study.best_value} (params: {study.best_params})")

In [None]:
def objective(trial):
    K.clear_session()
    train_set, val_set = tf.keras.utils.image_dataset_from_directory(
        DATA_DIR,
        validation_split=0.2,
        subset="both",
        seed=42,
        image_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=BATCH_SIZE,
    )
    num_classes = len(train_set.class_names)
    AUTOTUNE = tf.data.AUTOTUNE
    train_set = train_set.prefetch(buffer_size=AUTOTUNE).cache()
    val_set = val_set.prefetch(buffer_size=AUTOTUNE).cache()

    # Network architecture
    ## Input
    ### Hyperparameters for input
    num_input_filter = trial.suggest_int("num_input_filter", 16, 64)
    input_kernel_size = trial.suggest_categorical("input_kernel_size", [3, 5, 7])
    ### Architecture
    input_layer = keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
    conv_layer_1 = keras.layers.Conv2D(
        filters=num_input_filter, kernel_size=input_kernel_size, activation="relu"
    )(input_layer)
    pool_layer_1 = keras.layers.MaxPooling2D()(conv_layer_1)
    ## Residual Layers
    ### Hyperparameters for Residual
    num_residual_filter = trial.suggest_int("num_input_filter", 16, 64)
    residual_kernel_size = trial.suggest_categorical("input_kernel_size", [3, 5, 7])
    ### Architecture
    #### R1
    r1_conv_layer1 = keras.layers.Conv2D(
        filters=num_residual_filter,
        kernel_size=residual_kernel_size,
        activation="relu",
        padding="same",
    )(pool_layer_1)
    r1_batch_norm = keras.layers.BatchNormalization()(r1_conv_layer1)
    r1_conv_layer2 = keras.layers.Conv2D(
        filters=num_residual_filter, kernel_size=residual_kernel_size, padding="same"
    )(r1_batch_norm)
    r1_batch_norm_2 = keras.layers.BatchNormalization()(r1_conv_layer2)
    r1_out = keras.layers.Add()[pool_layer_1, r1_batch_norm_2]
    r1_relu = keras.layers.ReLU()(r1_out)
    #### R2
    r2_conv_layer1 = keras.layers.Conv2D(
        filters=num_residual_filter,
        kernel_size=residual_kernel_size,
        activation="relu",
        padding="same",
    )(r1_relu)
    r2_batch_norm = keras.layers.BatchNormalization()(r2_conv_layer1)
    r2_conv_layer2 = keras.layers.Conv2D(
        filters=num_residual_filter, kernel_size=residual_kernel_size, padding="same"
    )(r2_batch_norm)
    r2_batch_norm_2 = keras.layers.BatchNormalization()(r2_conv_layer2)
    r2_out = keras.layers.Add()[r1_relu, r2_batch_norm_2]
    r2_relu = keras.layers.ReLU()(r2_out)
    #### R3
    r3_conv_layer1 = keras.layers.Conv2D(
        filters=num_residual_filter,
        kernel_size=residual_kernel_size,
        activation="relu",
        padding="same",
    )(r2_relu)
    r3_batch_norm = keras.layers.BatchNormalization()(r3_conv_layer1)
    r3_conv_layer2 = keras.layers.Conv2D(
        filters=num_residual_filter, kernel_size=residual_kernel_size, padding="same"
    )(r3_batch_norm)
    r3_batch_norm_2 = keras.layers.BatchNormalization()(r3_conv_layer2)
    r3_out = keras.layers.Add()[r2_relu, r3_batch_norm_2]
    r3_relu = keras.layers.ReLU()(r3_out)
    #### R4
    r4_conv_layer1 = keras.layers.Conv2D(
        filters=num_residual_filter,
        kernel_size=residual_kernel_size,
        activation="relu",
        padding="same",
    )(r3_relu)
    r4_batch_norm = keras.layers.BatchNormalization()(r4_conv_layer1)
    r4_conv_layer2 = keras.layers.Conv2D(
        filters=num_residual_filter, kernel_size=residual_kernel_size, padding="same"
    )(r4_batch_norm)
    r4_batch_norm_2 = keras.layers.BatchNormalization()(r4_conv_layer2)
    r4_out = keras.layers.Add()[r3_relu, r4_batch_norm_2]
    r4_relu = keras.layers.ReLU()(r4_out)

    ## Output
    ### Hyperparameters for Output
    num_output = trial.suggest_int("num_input_filter", 16, 128)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    ### Architecture
    pool = keras.layers.MaxPooling2D()(r4_relu)
    flatten = keras.layers.Flatten()(pool)
    dense = keras.layers.Dense(num_output, activation="relu")(flatten)
    dropout = keras.layers.Dropout(dropout_rate)(dense)
    output = keras.layers.Dense(num_classes, activation="softmax")(dropout)
    # Model definition
    model = keras.Model(inputs=input_layer, outputs=output)

    # Hyperparameters for OneCycleScheduler
    max_lr = trial.suggest_float("max_lr", 0.005, 0.5)
    start_lr = trial.suggest_float("start_lr", max_lr * 0.01, max_lr * 0.8)
    last_lr = trial.suggest_float("last_lr", start_lr, max_lr)
    # Model definition
    onecycle = OneCycleSchedulerNoMom(
        TRAIN_SIZE // BATCH_SIZE * EPOCH,
        max_lr=max_lr,
        start_lr=start_lr,
        last_lr=last_lr,
    )
    early_stopping = tf.keras.callbacks.EarlyStopping(
        patience=5, restore_best_weights=True
    )
    model.compile(
        optimizer=tf.keras.optimizers.SGD(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"],
    )
    # Fitting model
    history = model.fit(
        train_set,
        validation_data=val_set,
        epochs=EPOCH,
        batch_size=BATCH_SIZE,
        callbacks=[early_stopping, onecycle],
    )
    # Evaluating and returning F1 score
    loss, acc = model.evaluate(val_set)
    return acc


study = optuna.create_study(
    storage="sqlite:///drive/MyDrive/maize-crop-diagnose/db_maize_models.sqlite3",
    study_name="resnet_onecycle_sgd",
    direction="maximize",
    load_if_exists=True,
)

study.optimize(objective, n_trials=100)
print(f"Best value: {study.best_value} (params: {study.best_params})")