# Justin Zarkovacki 2/15/2023
# Transfer Learning K49 -> Kanji

# Prepare imports

In [1]:
import numpy as np
import optuna
import sys
import os


from matplotlib import pyplot as plt

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, Dropout, AveragePooling2D, Flatten, Dense
from keras import models, backend as K
from tensorflow.keras import models
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

print("Done!")

  from .autonotebook import tqdm as notebook_tqdm


Done!


# Function Definitions and Variables

In [2]:
epochs = 12

def load(f):
    return np.load(f)['arr_0']

# Loading a base model requires you pop all dimensionality flattening layers
def load_base_model(filename):
    bm = models.load_model(filename)
    bm.pop()
    bm.pop()
    bm.pop()
    bm.trainable = False
    return bm

# Helper to create the graphics
def create_visuals(graph_title, model_hist, test_images, test_labels):
    accuracy_data = model_hist.history['accuracy']
    val_accuracy_data = model_hist.history['val_accuracy']

    lower_bound = min(min(accuracy_data), min(val_accuracy_data))

    plt.plot(accuracy_data, label='Train Accuracy')
    plt.plot(val_accuracy_data, label = 'Validation Accuracy')

    plt.title(graph_title)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.ylim([lower_bound - 0.01, 1])
    plt.legend(loc='lower right')

    print(accuracy_data[-1])
    print(val_accuracy_data[-1])
#     test_loss, test_acc = model_hist.evaluate(test_images, test_labels, verbose=2)

im_size = (64, 64)
sample_shape = (64, 64, 1)
bs = 128   # batch size

k49_classes = 49
k49_epochs = 15

kanji_classes = 150
kanji_epochs = 15

# Define paths to datasets
path_prefix = 'C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\NewModels\\'
data_prefix = 'C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\Datasets\\'
output_prefix = 'C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\NewModels\\ModelDevelopmentOutputs\\'
k49_train_images_path = data_prefix + 'k49_train_imgs'
k49_train_labels_path = data_prefix + 'Kuzushiji-49 - train-labels.npz'
k49_test_images_path = data_prefix + 'k49_test_imgs'
k49_test_labels_path = data_prefix + 'Kuzushiji-49 - test-labels.npz'
kanji_path = data_prefix + 'final_dataset'

AUTOTUNE = tf.data.AUTOTUNE
normalization_layer = tf.keras.layers.Rescaling(1./255)

print("Done!")

Done!


## Load and Optimize K49 Data

In [3]:
# Load K49 Data
# image_dataset_from_directory() loads images using a default interpolation method of bilinear
k49_train_images = tf.keras.utils.image_dataset_from_directory(k49_train_images_path, seed=222,
                  color_mode="grayscale", image_size=im_size, batch_size=bs)

k49_val_images = tf.keras.utils.image_dataset_from_directory(k49_test_images_path, seed=222,
                  color_mode="grayscale", image_size=im_size, batch_size=bs)

# Rescale K49 training data
normalized_k49_train = k49_train_images.map(lambda x, y: (normalization_layer(x), y))
normalized_k49_val = k49_val_images.map(lambda x, y: (normalization_layer(x), y))

# K49 Optimization
k49_train_images = normalized_k49_train.cache().prefetch(buffer_size=AUTOTUNE)
k49_val_images = normalized_k49_val.cache().prefetch(buffer_size=AUTOTUNE)

print("Done!")

Found 232365 files belonging to 49 classes.
Found 38547 files belonging to 49 classes.
Done!


## Load and Optimize Kanji Data

In [4]:
# kanji_train = tf.keras.utils.image_dataset_from_directory(kanji_path, validation_split=0.3, seed=222,
#                   subset="training", color_mode="grayscale", image_size=im_size, batch_size=bs)
kanji_train = tf.keras.utils.image_dataset_from_directory(kanji_path, validation_split=0.7, seed=222,
                  subset="training", color_mode="grayscale", image_size=im_size, batch_size=bs)

kanji_val = tf.keras.utils.image_dataset_from_directory(kanji_path, validation_split=0.3, seed=222,
                  subset="validation", color_mode="grayscale", image_size=im_size, batch_size=bs)

# Rescale Kanji training data
normalized_kanji_train = kanji_train.map(lambda x, y: (normalization_layer(x), y))
normalized_kanji_val = kanji_val.map(lambda x, y: (normalization_layer(x), y))

# Kanji Optimization
kanji_train = kanji_train.cache().prefetch(buffer_size=AUTOTUNE)
kanji_val = kanji_val.cache().prefetch(buffer_size=AUTOTUNE)

print("Done!")

Found 70599 files belonging to 150 classes.
Using 21180 files for training.
Found 70599 files belonging to 150 classes.
Using 21179 files for validation.
Done!


This notebook will create an ensemble model for Kanji character recognition. It wil be composed of 2 basic models, and one transfer learning model. Knowledge from K49 will be transfered to Kanji.

# Creating the Basic Models

## Kanji Model 1

In [4]:
def kanji_objective1(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 4)
    l1_filters = trial.suggest_int('l1_kernel', 32, 64)
    l2_filters = trial.suggest_int('l2_kernel', 32, 96)
    l1_activation = trial.suggest_categorical('l1_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('l2_activation', ['relu', 'sigmoid', 'tanh'])
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 3)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 128)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    # Design model
    kanji_1_design = Sequential()
    kanji_1_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
    kanji_1_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kanji_1_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=sample_shape))
    kanji_1_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    #     kanji_1_design.add(Conv2D(l3_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))S
    kanji_1_design.add(Flatten())
    kanji_1_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kanji_1_design.add(Dense(kanji_classes))

    kanji_1_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(kanji1_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kanji_1_history = kanji_1_design.fit(kanji_train, epochs=kanji_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

    # Important metric for optuna to optimize over
    return kanji_1_history.history['val_accuracy'][-1]

In [8]:
# Run Study 1
fileName = output_prefix + 'Kanji_Model_1.txt'
orig_stdout = sys.stdout
with open(fileName, 'w') as f:
    sys.stdout = f
    kanji_study1 = optuna.create_study(direction='maximize', study_name="Kanji-1")
    kanji_study1.optimize(kanji_objective1, n_trials=15)
    sys.stdout = orig_stdout
    f.close()

[32m[I 2023-03-19 13:49:10,716][0m A new study created in memory with name: Kanji-1[0m
[32m[I 2023-03-19 13:53:06,015][0m Trial 0 finished with value: 0.9121771454811096 and parameters: {'kernel_size': 3, 'l1_kernel': 52, 'l2_kernel': 39, 'l1_activation': 'relu', 'l2_activation': 'tanh', 'average_pooling_size': 3, 'dense_layer_size': 95, 'dense_layer_activation': 'relu'}. Best is trial 0 with value: 0.9121771454811096.[0m
[32m[I 2023-03-19 13:56:11,580][0m Trial 1 finished with value: 0.8702488541603088 and parameters: {'kernel_size': 2, 'l1_kernel': 40, 'l2_kernel': 47, 'l1_activation': 'tanh', 'l2_activation': 'tanh', 'average_pooling_size': 3, 'dense_layer_size': 101, 'dense_layer_activation': 'sigmoid'}. Best is trial 0 with value: 0.9121771454811096.[0m
[32m[I 2023-03-19 14:00:47,913][0m Trial 2 finished with value: 0.8968789577484131 and parameters: {'kernel_size': 4, 'l1_kernel': 63, 'l2_kernel': 50, 'l1_activation': 'tanh', 'l2_activation': 'relu', 'average_pooling_s

In [9]:
# Print the info from the best trial
print(f'Best trial info:\n{kanji_study1.best_trial}\n')
for param, value in kanji_study1.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=5, state=1, values=[0.9235563278198242], datetime_start=datetime.datetime(2023, 3, 19, 14, 7, 50, 926188), datetime_complete=datetime.datetime(2023, 3, 19, 14, 12, 58, 348620), params={'kernel_size': 4, 'l1_kernel': 63, 'l2_kernel': 91, 'l1_activation': 'relu', 'l2_activation': 'tanh', 'average_pooling_size': 3, 'dense_layer_size': 126, 'dense_layer_activation': 'tanh'}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'kernel_size': IntDistribution(high=4, log=False, low=2, step=1), 'l1_kernel': IntDistribution(high=64, log=False, low=32, step=1), 'l2_kernel': IntDistribution(high=96, log=False, low=32, step=1), 'l1_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'l2_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'average_pooling_size': IntDistribution(high=3, log=False, low=2, step=1), 'dense_layer_size': IntDistribution(high=128, log=False, low=64, step=1), 'dense_layer_a

In [16]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 4
l1_filters = 63
l2_filters = 91
l1_activation = "relu"
l2_activation = "tanh"
average_pooling_size = 3
dense_layer_size = 126
dense_layer_activation = "tanh"

kanji_1 = Sequential()
kanji_1.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
kanji_1.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kanji_1.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=sample_shape))
kanji_1.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kanji_1.add(Flatten())
kanji_1.add(Dense(dense_layer_size, activation=dense_layer_activation))
kanji_1.add(Dense(kanji_classes))

kanji_1.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [20]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

kanji_1_optuna_history = kanji_1.fit(kanji_train, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)
kanji_1.save(path_prefix + 'kanji_models\\kanji_1.h5', save_format='h5')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15


## Kanji Model 2

In [10]:
def kanji_objective2(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 4)
    l1_filters = trial.suggest_int('l1_kernel', 32, 54)
    l2_filters = trial.suggest_int('l2_kernel', 20, 64)
    l3_filters = trial.suggest_int('l3_kernel', 32, 64)
    l1_activation = trial.suggest_categorical('l1_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('l2_activation', ['relu', 'sigmoid', 'tanh'])
    l3_activation = trial.suggest_categorical('l3_activation', ['relu', 'sigmoid', 'tanh'])
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 128)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    # Design model
    kanji_2_design = Sequential()
    kanji_2_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
    kanji_2_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=sample_shape))
    kanji_2_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kanji_2_design.add(Conv2D(l3_filters, kernel_size=kern_size, activation=l3_activation, input_shape=sample_shape))
    kanji_2_design.add(Flatten())
    kanji_2_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kanji_2_design.add(Dense(kanji_classes))

    kanji_2_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(kanji_2_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kanji_2_history = kanji_2_design.fit(kanji_train, epochs=kanji_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

    # Important metric for optuna to optimize over
    return kanji_2_history.history['val_accuracy'][-1]

In [11]:
# Run Study 2
fileName = output_prefix + 'Kanji_Model_2.txt'
orig_stdout = sys.stdout
with open(fileName, 'w') as f:
    sys.stdout = f
    kanji_study2 = optuna.create_study(direction='maximize', study_name="Kanji-2")
    kanji_study2.optimize(kanji_objective2, n_trials=15)
    sys.stdout = orig_stdout
    f.close()

[32m[I 2023-03-19 14:54:51,539][0m A new study created in memory with name: Kanji-2[0m
[32m[I 2023-03-19 15:07:27,353][0m Trial 0 finished with value: 0.9096274375915527 and parameters: {'kernel_size': 4, 'first_layer_kernel': 42, 'second_layer_kernel': 33, 'third_layer_kernel': 53, 'l1_activation': 'sigmoid', 'l2_activation': 'tanh', 'average_pooling_size': 4, 'dense_layer_size': 126, 'dense_layer_activation': 'sigmoid'}. Best is trial 0 with value: 0.9096274375915527.[0m
[32m[I 2023-03-19 15:21:38,295][0m Trial 1 finished with value: 0.8713820576667786 and parameters: {'kernel_size': 3, 'first_layer_kernel': 54, 'second_layer_kernel': 35, 'third_layer_kernel': 45, 'l1_activation': 'sigmoid', 'l2_activation': 'relu', 'average_pooling_size': 2, 'dense_layer_size': 123, 'dense_layer_activation': 'sigmoid'}. Best is trial 0 with value: 0.9096274375915527.[0m
[32m[I 2023-03-19 15:24:56,171][0m Trial 2 finished with value: 0.024080457165837288 and parameters: {'kernel_size': 4, 

In [12]:
# Print the info from the best trial
print(f'Best trial info:\n{kanji_study2.best_trial}\n')
for param, value in kanji_study2.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=14, state=1, values=[0.9142074584960938], datetime_start=datetime.datetime(2023, 3, 19, 17, 38, 42, 393909), datetime_complete=datetime.datetime(2023, 3, 19, 17, 53, 22, 554222), params={'kernel_size': 4, 'first_layer_kernel': 49, 'second_layer_kernel': 55, 'third_layer_kernel': 49, 'l1_activation': 'tanh', 'l2_activation': 'tanh', 'average_pooling_size': 4, 'dense_layer_size': 106, 'dense_layer_activation': 'sigmoid'}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'kernel_size': IntDistribution(high=4, log=False, low=2, step=1), 'first_layer_kernel': IntDistribution(high=54, log=False, low=32, step=1), 'second_layer_kernel': IntDistribution(high=64, log=False, low=20, step=1), 'third_layer_kernel': IntDistribution(high=64, log=False, low=32, step=1), 'l1_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'l2_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'average_pooling_si

In [18]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 4
l1_filters = 49
l2_filters = 55
l3_filters = 49
l1_activation = "tanh"
l2_activation = "relu"
l3_activation = "tanh"
average_pooling_size = 4
dense_layer_size = 106
dense_layer_activation = "sigmoid"

kanji_2 = Sequential()
kanji_2.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
kanji_2.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
kanji_2.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kanji_2.add(Conv2D(l3_filters, kernel_size=kern_size, activation=l2_activation, input_shape=sample_shape))
kanji_2.add(Flatten())
kanji_2.add(Dense(dense_layer_size, activation=dense_layer_activation))
kanji_2.add(Dense(kanji_classes))

kanji_2.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [19]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

kanji_2_optuna_history = kanji_2.fit(kanji_train, epochs=kanji_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)
kanji_2.save(path_prefix + 'kanji_models\\kanji_2.h5', save_format='h5')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


## Kanji Model 3

In [5]:
def kanji_objective3(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 4)
    l1_filters = trial.suggest_int('l1_kernel', 54, 96)
    l2_filters = trial.suggest_int('l2_kernel', 42, 74)
    l3_filters = trial.suggest_int('l3_kernel', 20, 64)
    l1_activation = trial.suggest_categorical('l1_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('l2_activation', ['relu', 'sigmoid', 'tanh'])
    l3_activation = trial.suggest_categorical('l3_activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 128)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    # Design model
    kanji_3_design = Sequential()
    kanji_3_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
    kanji_3_design.add(Dropout(dropout))
    kanji_3_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=sample_shape))
    kanji_3_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kanji_3_design.add(Conv2D(l3_filters, kernel_size=kern_size, activation=l3_activation, input_shape=sample_shape))
    kanji_3_design.add(Flatten())
    kanji_3_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kanji_3_design.add(Dense(kanji_classes))

    kanji_3_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(kanji_2_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kanji_3_history = kanji_3_design.fit(kanji_train, epochs=kanji_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

    # Important metric for optuna to optimize over
    return kanji_3_history.history['val_accuracy'][-1]

In [6]:
# Run Study 3
fileName = output_prefix + 'Kanji_Model_3.txt'
orig_stdout = sys.stdout
with open(fileName, 'w') as f:
    sys.stdout = f
    kanji_study3 = optuna.create_study(direction='maximize', study_name="Kanji-3")
    kanji_study3.optimize(kanji_objective3, n_trials=15)
    sys.stdout = orig_stdout
    f.close()

[32m[I 2023-03-19 23:35:29,022][0m A new study created in memory with name: Kanji-3[0m
[32m[I 2023-03-19 23:57:07,419][0m Trial 0 finished with value: 0.8555172681808472 and parameters: {'kernel_size': 2, 'l1_kernel': 88, 'l2_kernel': 58, 'l3_kernel': 47, 'l1_activation': 'sigmoid', 'l2_activation': 'relu', 'l3_activation': 'tanh', 'dropout': 0.18739687932825572, 'average_pooling_size': 4, 'dense_layer_size': 65, 'dense_layer_activation': 'relu'}. Best is trial 0 with value: 0.8555172681808472.[0m
[32m[I 2023-03-20 00:18:07,727][0m Trial 1 finished with value: 0.8630719184875488 and parameters: {'kernel_size': 3, 'l1_kernel': 59, 'l2_kernel': 71, 'l3_kernel': 47, 'l1_activation': 'sigmoid', 'l2_activation': 'relu', 'l3_activation': 'relu', 'dropout': 0.21738643996234241, 'average_pooling_size': 3, 'dense_layer_size': 123, 'dense_layer_activation': 'sigmoid'}. Best is trial 1 with value: 0.8630719184875488.[0m
[32m[I 2023-03-20 00:39:24,221][0m Trial 2 finished with value: 0.

In [7]:
# Print the info from the best trial
print(f'Best trial info:\n{kanji_study3.best_trial}\n')
for param, value in kanji_study3.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=14, state=1, values=[0.9037725925445557], datetime_start=datetime.datetime(2023, 3, 20, 4, 19, 46, 965967), datetime_complete=datetime.datetime(2023, 3, 20, 4, 42, 4, 371668), params={'kernel_size': 4, 'l1_kernel': 70, 'l2_kernel': 50, 'l3_kernel': 34, 'l1_activation': 'relu', 'l2_activation': 'tanh', 'l3_activation': 'sigmoid', 'dropout': 0.2776581338836957, 'average_pooling_size': 4, 'dense_layer_size': 80, 'dense_layer_activation': 'tanh'}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'kernel_size': IntDistribution(high=4, log=False, low=2, step=1), 'l1_kernel': IntDistribution(high=96, log=False, low=54, step=1), 'l2_kernel': IntDistribution(high=74, log=False, low=42, step=1), 'l3_kernel': IntDistribution(high=64, log=False, low=20, step=1), 'l1_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'l2_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'l3_activation': Catego

In [8]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 4
l1_filters = 70
l2_filters = 50
l3_filters = 34
l1_activation = "relu"
l2_activation = "tanh"
l3_activation = "sigmoid"
dropout	= 0.2776581338836957
average_pooling_size = 4
dense_layer_size = 80
dense_layer_activation = "tanh"

kanji_3 = Sequential()
kanji_3.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
kanji_3.add(Dropout(dropout))
kanji_3.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=sample_shape))
kanji_3.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kanji_3.add(Conv2D(l3_filters, kernel_size=kern_size, activation=l3_activation, input_shape=sample_shape))
kanji_3.add(Flatten())
kanji_3.add(Dense(dense_layer_size, activation=dense_layer_activation))
kanji_3.add(Dense(kanji_classes))

kanji_3.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [9]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

kanji_3_optuna_history = kanji_3.fit(kanji_train, epochs=kanji_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)
kanji_3.save(path_prefix + 'kanji_models\\kanji_3.h5', save_format='h5')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


# Creating K49 Base Models for Transfer Learning

## K49 Base Model 1

In [4]:
def k49_base1_objective(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 3)
    l1_filters = trial.suggest_int('l1_kernel', 20, 54)
    l2_filters = trial.suggest_int('l2_kernel', 40, 80)
    l1_activation = trial.suggest_categorical('l1_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('l2_activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 40, 80)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    k49_interpolated_shape = (64, 64, 1)

    # Design model
    k49_base1_design = Sequential()
    k49_base1_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=k49_interpolated_shape))
    k49_base1_design.add(Dropout(dropout))
    k49_base1_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=k49_interpolated_shape))
    k49_base1_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    k49_base1_design.add(Flatten())
    k49_base1_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    k49_base1_design.add(Dense(k49_classes))

    k49_base1_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

    #  print(k49_base_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    k49_base1_history = k49_base1_design.fit(k49_train_images,
                    epochs=k49_epochs, batch_size=bs, callbacks=callback,
                    validation_data=k49_val_images)

    # Important metric for optuna to optimize over
    return k49_base1_history.history['val_accuracy'][-1]

In [5]:
# Run Study 4
# tf.config.run_functions_eagerly(True)
outputs_path = output_prefix + 'K49_Base1_Output.txt'
orig_stdout = sys.stdout

with open(outputs_path, 'w') as f:
    sys.stdout = f
    k49_base1_study = optuna.create_study(direction='maximize', study_name="K49-Base1")
    k49_base1_study.optimize(k49_base1_objective, n_trials=10)
    sys.stdout = orig_stdout
    f.close()

[32m[I 2023-03-20 10:14:25,769][0m A new study created in memory with name: K49-Base1[0m
[32m[I 2023-03-20 11:41:49,999][0m Trial 0 finished with value: 0.8237476348876953 and parameters: {'kernel_size': 2, 'l1_kernel': 35, 'l2_kernel': 57, 'l1_activation': 'relu', 'l2_activation': 'relu', 'dropout': 0.26424772540862734, 'average_pooling_size': 3, 'dense_layer_size': 51, 'dense_layer_activation': 'relu'}. Best is trial 0 with value: 0.8237476348876953.[0m
[33m[W 2023-03-20 11:58:07,766][0m Trial 1 failed with parameters: {'kernel_size': 2, 'l1_kernel': 27, 'l2_kernel': 64, 'l1_activation': 'sigmoid', 'l2_activation': 'tanh', 'dropout': 0.2258362952046052, 'average_pooling_size': 4, 'dense_layer_size': 50, 'dense_layer_activation': 'sigmoid'} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "c:\Users\LifeH\SoftwareDevelopment\Applications\Python\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_va

In [None]:
# Print the info from the best trial
print(f'Best trial info:\n{k49_base1_study.best_trial}\n')
for param, value in k49_base1_study.best_params.items():
    print(f'Param: {param}\tValue: {value}')

In [None]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 
l1_filters = 
l2_filters = 
activations = ""
dropout	= 
average_pooling_size = 
dense_layer_size = 
dense_layer_activation = ""

k49_base1 = Sequential()
k49_base1.add(Conv2D(l1_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
k49_base1.add(Dropout(dropout))
k49_base1.add(Conv2D(l2_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
k49_base1.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
k49_base1.add(Flatten())
k49_base1.add(Dense(dense_layer_size, activation=dense_layer_activation))
k49_base1.add(Dense(k49_classes))

k49_base1.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [None]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

k49_base1_optuna_history = k49_base1.fit(k49_train_images, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=k49_val_images)

k49_base1.save(path_prefix + 'base_models\\k49_base1.h5', save_format='h5')

## K49 Base Model 2

In [5]:
def k49_base2_objective(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 3)
    l1_filters = trial.suggest_int('l1_kernel', 20, 40)
    l2_filters = trial.suggest_int('l2_kernel', 40, 64)
    l3_filters = trial.suggest_int('l3_kernel', 54, 96)
    l1_activation = trial.suggest_categorical('l1_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('l2_activation', ['relu', 'sigmoid', 'tanh'])
    l3_activation = trial.suggest_categorical('l3_activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 80)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    k49_interpolated_shape = (64, 64, 1)

    # Design model
    k49_base2_design = Sequential()
    k49_base2_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=k49_interpolated_shape))
    k49_base2_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=k49_interpolated_shape))
    k49_base2_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    k49_base2_design.add(Conv2D(l3_filters, kernel_size=kern_size, activation=l3_activation, input_shape=k49_interpolated_shape))
    k49_base2_design.add(Dropout(dropout))
    k49_base2_design.add(Flatten())
    k49_base2_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    k49_base2_design.add(Dense(k49_classes))

    k49_base2_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

    #  print(k49_base_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    k49_base2_history = k49_base2_design.fit(k49_train_images,
                    epochs=k49_epochs, batch_size=bs, callbacks=callback,
                    validation_data=k49_val_images)

    # Important metric for optuna to optimize over
    return k49_base2_history.history['val_accuracy'][-1]

In [6]:
# Run Study 5
tf.config.run_functions_eagerly(True)
outputs_path = output_prefix + 'K49_Base2_Output.txt'
orig_stdout = sys.stdout

with open(outputs_path, 'w') as f:
    sys.stdout = f
    k49_base2_study = optuna.create_study(direction='maximize', study_name="K49-Base2")
    k49_base2_study.optimize(k49_base2_objective, n_trials=25)
    sys.stdout = orig_stdout
    f.close()

[32m[I 2023-03-20 12:00:36,990][0m A new study created in memory with name: K49-Base2[0m
[32m[I 2023-03-20 12:05:38,050][0m Trial 0 finished with value: 0.025942355394363403 and parameters: {'kernel_size': 2, 'l1_kernel': 20, 'l2_kernel': 42, 'l3_kernel': 64, 'l1_activation': 'sigmoid', 'l2_activation': 'tanh', 'l3_activation': 'tanh', 'dropout': 0.22684214753793908, 'average_pooling_size': 2, 'dense_layer_size': 73, 'dense_layer_activation': 'relu'}. Best is trial 0 with value: 0.025942355394363403.[0m
[32m[I 2023-03-20 12:18:29,462][0m Trial 1 finished with value: 0.8604560494422913 and parameters: {'kernel_size': 2, 'l1_kernel': 25, 'l2_kernel': 44, 'l3_kernel': 55, 'l1_activation': 'relu', 'l2_activation': 'tanh', 'l3_activation': 'relu', 'dropout': 0.1799279544584269, 'average_pooling_size': 3, 'dense_layer_size': 74, 'dense_layer_activation': 'relu'}. Best is trial 1 with value: 0.8604560494422913.[0m
[32m[I 2023-03-20 12:37:10,855][0m Trial 2 finished with value: 0.85

In [7]:
# Print the info from the best trial
print(f'Best trial info:\n{k49_base2_study.best_trial}\n')
for param, value in k49_base2_study.best_params.items():
    print(f'Param: {param}\tValue: {value}')

# 6 finished with value: 0.8910421133041382 and parameters: {'kernel_size': 3, 'l1_kernel': 36, 'l2_kernel': 50, 'l3_kernel': 91, 'l1_activation': 'tanh', 'l2_activation': 'relu', 'l3_activation': 'relu', 'dropout': 0.2092070135957213, 'average_pooling_size': 3, 'dense_layer_size': 71, 'dense_layer_activation': 'sigmoid'}. Best is trial 6 with value: 0.8910421133041382.

Best trial info:
FrozenTrial(number=14, state=TrialState.COMPLETE, values=[0.9177886843681335], datetime_start=datetime.datetime(2023, 3, 20, 14, 42, 43, 535766), datetime_complete=datetime.datetime(2023, 3, 20, 14, 59, 25, 68242), params={'kernel_size': 3, 'l1_kernel': 40, 'l2_kernel': 55, 'l3_kernel': 70, 'l1_activation': 'relu', 'l2_activation': 'relu', 'l3_activation': 'relu', 'dropout': 0.24449647909566152, 'average_pooling_size': 4, 'dense_layer_size': 77, 'dense_layer_activation': 'tanh'}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'kernel_size': IntDistribution(high=3, log=False, low=2, step=1), 'l1_kernel': IntDistribution(high=40, log=False, low=20, step=1), 'l2_kernel': IntDistribution(high=64, log=False, low=40, step=1), 'l3_kernel': IntDistribution(high=96, log=False, low=54, step=1), 'l1_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'l2_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'l3_a

In [9]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 3
l1_filters = 40
l2_filters = 55
l3_filters = 70
l1_activation = "relu"
l2_activation = "relu"
l3_activation = "relu"
dropout	= 0.24449647909566152
average_pooling_size = 2
dense_layer_size = 77
dense_layer_activation = "tanh"

k49_interpolated_shape = (64, 64, 1)

k49_base2 = Sequential()
k49_base2.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=k49_interpolated_shape))
k49_base2.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=k49_interpolated_shape))
k49_base2.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
k49_base2.add(Conv2D(l3_filters, kernel_size=kern_size, activation=l3_activation, input_shape=k49_interpolated_shape))
k49_base2.add(Dropout(dropout))
k49_base2.add(Flatten())
k49_base2.add(Dense(dense_layer_size, activation=dense_layer_activation))
k49_base2.add(Dense(k49_classes))

k49_base2.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

k49_base2.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 62, 62, 40)        400       
                                                                 
 conv2d_1 (Conv2D)           (None, 60, 60, 55)        19855     
                                                                 
 average_pooling2d (AverageP  (None, 30, 30, 55)       0         
 ooling2D)                                                       
                                                                 
 conv2d_2 (Conv2D)           (None, 28, 28, 70)        34720     
                                                                 
 dropout (Dropout)           (None, 28, 28, 70)        0         
                                                                 
 flatten (Flatten)           (None, 54880)             0         
                                                        

In [5]:
# tf.config.run_functions_eagerly(True)

# callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

# k49_base2_optuna_history = k49_base2.fit(k49_train_images, epochs=k49_epochs, batch_size=bs,
#                     callbacks=callback, validation_data=k49_val_images)

# k49_base2.save(path_prefix + 'base_models\\k49_base2.h5', save_format='h5')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15


In [10]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

k49_base2_optuna_history = k49_base2.fit(k49_train_images, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=k49_val_images)

k49_base2.save(path_prefix + 'base_models\\temp_k49_base2.h5', save_format='h5')

Epoch 1/15
Epoch 2/15

# Creating Kanji Top Models for Transfer Learning

## Kanji Top Model 1

In [5]:
base_path = path_prefix + 'base_models\\k49_base1.h5'

def kanji_top1_objective(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 3)
    l1_filters = trial.suggest_int('first_layer_kernel', 32, 54)
    l2_filters = trial.suggest_int('second_layer_kernel', 64, 96)
    l1_activation = trial.suggest_categorical('first_layer_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('second_layer_activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 128)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    base_model = load_base_model(base_path)
    
    # Design model
    kanji_top1_design = Sequential()
    kanji_top1_design.add(base_model)
    kanji_top1_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
    kanji_top1_design.add(Dropout(dropout))
    kanji_top1_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=sample_shape))
    kanji_top1_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kanji_top1_design.add(Flatten())
    kanji_top1_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kanji_top1_design.add(Dense(kanji_classes))

    kanji_top1_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(kanji_top_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kanji_top1_history = kanji_top1_design.fit(kanji_train, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

    # Important metric for optuna to optimize over
    return kanji_top1_history.history['val_accuracy'][-1]

In [6]:
# Run Study 6
outputs_path = output_prefix + 'Kanji_TL1_Output.txt'
orig_stdout = sys.stdout
with open(outputs_path, 'w') as f:
    sys.stdout = f
    kanji_top1_study = optuna.create_study(direction='maximize', study_name="Kanji-TL1")
    kanji_top1_study.optimize(kanji_top1_objective, n_trials=15)
    sys.stdout = orig_stdout
    f.close()

[32m[I 2023-03-20 20:19:11,556][0m A new study created in memory with name: Kanji-TL1[0m
[32m[I 2023-03-20 20:23:23,203][0m Trial 0 finished with value: 0.8699182868003845 and parameters: {'kernel_size': 2, 'first_layer_kernel': 39, 'second_layer_kernel': 73, 'first_layer_activation': 'tanh', 'second_layer_activation': 'sigmoid', 'dropout': 0.18575853734241426, 'average_pooling_size': 2, 'dense_layer_size': 78, 'dense_layer_activation': 'tanh'}. Best is trial 0 with value: 0.8699182868003845.[0m
[32m[I 2023-03-20 20:27:34,393][0m Trial 1 finished with value: 0.9157183766365051 and parameters: {'kernel_size': 2, 'first_layer_kernel': 54, 'second_layer_kernel': 69, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.2583689339186911, 'average_pooling_size': 3, 'dense_layer_size': 124, 'dense_layer_activation': 'relu'}. Best is trial 1 with value: 0.9157183766365051.[0m
[32m[I 2023-03-20 20:31:37,742][0m Trial 2 finished with value: 0.8987676501274

In [7]:
# Print the info from the best trial
print(f'Best trial info:\n{kanji_top1_study.best_trial}\n')
for param, value in kanji_top1_study.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=7, state=1, values=[0.9218093156814575], datetime_start=datetime.datetime(2023, 3, 20, 20, 48, 0, 310703), datetime_complete=datetime.datetime(2023, 3, 20, 20, 52, 6, 571160), params={'kernel_size': 2, 'first_layer_kernel': 37, 'second_layer_kernel': 66, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.2796749501764636, 'average_pooling_size': 2, 'dense_layer_size': 118, 'dense_layer_activation': 'sigmoid'}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'kernel_size': IntDistribution(high=3, log=False, low=2, step=1), 'first_layer_kernel': IntDistribution(high=54, log=False, low=32, step=1), 'second_layer_kernel': IntDistribution(high=96, log=False, low=64, step=1), 'first_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'second_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'dropout': FloatDistribution(high=0.3, log=False, low=

In [6]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 3
l1_filters = 50
l2_filters = 67
l1_activation = "tanh"
l2_activation = "tanh"
dropout	= 0.2623467440665549
average_pooling_size = 2
dense_layer_size = 115
dense_layer_activation = "sigmoid"

k49_base_model = load_base_model(base_path)

kanji_top1 = Sequential()
kanji_top1.add(k49_base_model)
kanji_top1.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
kanji_top1.add(Dropout(dropout))
kanji_top1.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=sample_shape))
kanji_top1.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kanji_top1.add(Flatten())
kanji_top1.add(Dense(dense_layer_size, activation=dense_layer_activation))
kanji_top1.add(Dense(kanji_classes))

kanji_top1.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [10]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

kanji_tl1_history = kanji_top1.fit(kanji_train, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

kanji_top1.save(path_prefix + 'kanji_models\\kanji_TL1.h5', save_format='h5')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


## Kanji Top Model 2

In [6]:
base_path = path_prefix + 'base_models\\k49_base2.h5'

def kanji_top2_objective(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 3)
    l1_filters = trial.suggest_int('l1_kernel', 32, 54)
    l2_filters = trial.suggest_int('l2_kernel', 64, 96)
    l3_filters = trial.suggest_int('l3_kernel', 64, 96)
    l1_activation = trial.suggest_categorical('l1_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('l2_activation', ['relu', 'sigmoid', 'tanh'])
    l3_activation = trial.suggest_categorical('l3_activation', ['relu', 'sigmoid', 'tanh'])
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 128)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    base_model = load_base_model(base_path)
    
    # Design model
    kanji_top2_design = Sequential()
    kanji_top2_design.add(base_model)
    kanji_top2_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
    kanji_top2_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kanji_top2_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=sample_shape))
    kanji_top2_design.add(Conv2D(l3_filters, kernel_size=kern_size, activation=l3_activation, input_shape=sample_shape))
    kanji_top2_design.add(Flatten())
    kanji_top2_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kanji_top2_design.add(Dense(kanji_classes))

    kanji_top2_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

    print(kanji_top2_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kanji_top2_history = kanji_top2_design.fit(kanji_train, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

    # Important metric for optuna to optimize over
    return kanji_top2_history.history['val_accuracy'][-1]

In [8]:
temp = load_base_model(base_path)
temp.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 62, 62, 40)        400       
                                                                 
 conv2d_1 (Conv2D)           (None, 60, 60, 55)        19855     
                                                                 
 average_pooling2d (AverageP  (None, 15, 15, 55)       0         
 ooling2D)                                                       
                                                                 
 conv2d_2 (Conv2D)           (None, 13, 13, 70)        34720     
                                                                 
 dropout (Dropout)           (None, 13, 13, 70)        0         
                                                                 
Total params: 54,975
Trainable params: 0
Non-trainable params: 54,975
____________________________________________________

In [None]:
# Run Study 7
outputs_path = output_prefix + 'Kanji_TL2_Output.txt'
orig_stdout = sys.stdout
with open(outputs_path, 'w') as f:
    sys.stdout = f
    kanji_top2_study = optuna.create_study(direction='maximize', study_name="Kanji-TL2")
    kanji_top2_study.optimize(kanji_top2_objective, n_trials=20)
    sys.stdout = orig_stdout
    f.close()

In [None]:
# Print the info from the best trial
print(f'Best trial info:\n{kanji_top2_study.best_trial}\n')
for param, value in kanji_top2_study.best_params.items():
    print(f'Param: {param}\tValue: {value}')

In [None]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 
l1_filters = 
l2_filters = 
activations = ""
dropout	= 
average_pooling_size = 
dense_layer_size = 
dense_layer_activation = ""

k49_base_model = load_base_model(base_path)

kanji_top2 = Sequential()
kanji_top2.add(k49_base_model)
kanji_top2.add(Conv2D(l1_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
kanji_top2.add(Dropout(dropout))
kanji_top2.add(Conv2D(l2_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
kanji_top2.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kanji_top2.add(Flatten())
kanji_top2.add(Dense(dense_layer_size, activation=dense_layer_activation))
kanji_top2.add(Dense(kanji_classes))

kanji_top2.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [None]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

kanji_tl2_history = kanji_top2.fit(kanji_train, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

kanji_top2.save(path_prefix + 'kanji_models\\kanji_TL2.h5', save_format='h5')