# Justin Zarkovacki 2/15/2023
# Transfer Learning K49 -> Kanji

# Prepare imports

In [1]:
import numpy as np
import optuna
import sys
import os

from matplotlib import pyplot as plt

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, Dropout, AveragePooling2D, Flatten, Dense
from keras import models, backend as K
from tensorflow.keras import models

print("Done!")

  from .autonotebook import tqdm as notebook_tqdm


Done!


In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Function Definitions and Variables

In [2]:
epochs = 12

def load(f):
    return np.load(f)['arr_0']

# Loading a base model requires you pop all dimensionality flattening layers
def load_base_model(filename):
    bm = models.load_model(filename)
    bm.pop()
    bm.pop()
    bm.pop()
    bm.trainable = False
    return bm

# Helper to create the graphics
def create_visuals(graph_title, model_hist, test_images, test_labels):
    accuracy_data = model_hist.history['accuracy']
    val_accuracy_data = model_hist.history['val_accuracy']

    lower_bound = min(min(accuracy_data), min(val_accuracy_data))

    plt.plot(accuracy_data, label='Train Accuracy')
    plt.plot(val_accuracy_data, label = 'Validation Accuracy')

    plt.title(graph_title)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.ylim([lower_bound - 0.01, 1])
    plt.legend(loc='lower right')

    print(accuracy_data[-1])
    print(val_accuracy_data[-1])
#     test_loss, test_acc = model_hist.evaluate(test_images, test_labels, verbose=2)

print("Done!")

Done!


In [3]:
im_size = (64, 64)
sample_shape = (64, 64, 1)
bs = 128   # batch size

k49_classes = 49
k49_epochs = 15

kanji_classes = 150
kanji_epochs = 15

# Define paths to datasets
prefix = 'C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\Datasets\\'
k49_train_images_path = prefix + 'k49_train_imgs'
k49_train_labels_path = prefix + 'Kuzushiji-49 - train-labels.npz'
k49_test_images_path = prefix + 'k49_test_imgs'
k49_test_labels_path = prefix + 'Kuzushiji-49 - test-labels.npz'
kanji_path = prefix + 'final_dataset'

## Load K49 Data

In [4]:
# Load K49 Data
# image_dataset_from_directory() loads images using a default interpolation method of bilinear.
# The K49 dataset needs to be bilinearly interpolated up to (64, 64) from (28, 28) 
k49_train_images = tf.keras.utils.image_dataset_from_directory(k49_train_images_path, seed=222,
                  color_mode="grayscale", image_size=im_size, batch_size=bs)

k49_val_images = tf.keras.utils.image_dataset_from_directory(k49_test_images_path, seed=222,
                  color_mode="grayscale", image_size=im_size, batch_size=bs)

print("Done!")

Found 232365 files belonging to 49 classes.
Found 38547 files belonging to 49 classes.
Done!


## Load Kanji Data

In [4]:
# kanji_train = tf.keras.utils.image_dataset_from_directory(kanji_path, validation_split=0.3, seed=222,
#                   subset="training", color_mode="grayscale", image_size=im_size, batch_size=bs)
kanji_train = tf.keras.utils.image_dataset_from_directory(kanji_path, validation_split=0.7, seed=222,
                  subset="training", color_mode="grayscale", image_size=im_size, batch_size=bs)

kanji_val = tf.keras.utils.image_dataset_from_directory(kanji_path, validation_split=0.3, seed=222,
                  subset="validation", color_mode="grayscale", image_size=im_size, batch_size=bs)


AUTOTUNE = tf.data.AUTOTUNE
normalization_layer = tf.keras.layers.Rescaling(1./255)

print("Done!")

Found 70599 files belonging to 150 classes.
Using 21180 files for training.
Found 70599 files belonging to 150 classes.
Using 21179 files for validation.
Done!


### Rescale Image Data and Training Performance Optimizations

In [None]:
# Rescale K49 training data
normalized_k49_train = k49_train_images.map(lambda x, y: (normalization_layer(x), y))
normalized_k49_val = k49_val_images.map(lambda x, y: (normalization_layer(x), y))

# K49 Optimization
k49_train_images = normalized_k49_train.cache().prefetch(buffer_size=AUTOTUNE)
k49_val_images = normalized_k49_val.cache().prefetch(buffer_size=AUTOTUNE)

print("Done!")

In [5]:
# Rescale Kanji training data
normalized_kanji_train = kanji_train.map(lambda x, y: (normalization_layer(x), y))
normalized_kanji_val = kanji_val.map(lambda x, y: (normalization_layer(x), y))

# Kanji Optimization
kanji_train = kanji_train.cache().prefetch(buffer_size=AUTOTUNE)
kanji_val = kanji_val.cache().prefetch(buffer_size=AUTOTUNE)

print("Done!")

This notebook will create an ensemble model for Kanjij character recognition. It wil be composed of 2 basic models, and one transfer learning model. Knowledge from K49 will be transfered to Kanji.

# Creating Kanji Model 1

In [7]:
def kanji_objective1(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 4)
    l1_filters = trial.suggest_int('first_layer_kernel', 32, 64)
    l2_filters = trial.suggest_int('second_layer_kernel', 32, 96)
    l3_filters = trial.suggest_int('third_layer_kernel', 32, 48)
    activations = trial.suggest_categorical('activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 3)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 128)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    # Design model
    kanji_1_design = Sequential()
    kanji_1_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
    kanji_1_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kanji_1_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
    kanji_1_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kanji_1_design.add(Conv2D(l3_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
    kanji_1_design.add(Flatten())
    kanji_1_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kanji_1_design.add(Dense(kanji_classes))

    kanji_1_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(kanji1_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kanji_1_history = kanji_1_design.fit(kanji_train, epochs=kanji_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

    # Important metric for optuna to optimize over
    return kanji_1_history.history['val_accuracy'][-1]

In [11]:
# Run Study 1
fileName = 'C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\KanjiModelDevelopmentOutputs/Kanji_Model_1.txt'
orig_stdout = sys.stdout
with open(fileName, 'w') as f:
    sys.stdout = f
    kanji_study1 = optuna.create_study(direction='maximize', study_name="Kanji-1")
    kanji_study1.optimize(kanji_objective1, n_trials=25)
    sys.stdout = orig_stdout
    f.close()

[32m[I 2023-03-14 21:49:11,206][0m A new study created in memory with name: Kanji-1[0m
[32m[I 2023-03-14 21:50:37,642][0m Trial 0 finished with value: 0.7958354949951172 and parameters: {'kernel_size': 4, 'first_layer_kernel': 37, 'second_layer_kernel': 72, 'third_layer_kernel': 38, 'activation': 'sigmoid', 'dropout': 0.27301899586357065, 'average_pooling_size': 2, 'dense_layer_size': 67, 'dense_layer_activation': 'sigmoid'}. Best is trial 0 with value: 0.7958354949951172.[0m
[32m[I 2023-03-14 21:51:53,275][0m Trial 1 finished with value: 0.8719485998153687 and parameters: {'kernel_size': 4, 'first_layer_kernel': 42, 'second_layer_kernel': 61, 'third_layer_kernel': 33, 'activation': 'tanh', 'dropout': 0.16269426998606001, 'average_pooling_size': 3, 'dense_layer_size': 76, 'dense_layer_activation': 'tanh'}. Best is trial 1 with value: 0.8719485998153687.[0m
[32m[I 2023-03-14 21:53:38,117][0m Trial 2 finished with value: 0.8597195148468018 and parameters: {'kernel_size': 4, 'f

In [16]:
# Print the info from the best trial
print(f'Best trial info:\n{kanji_study1.best_trial}\n')
for param, value in kanji_study1.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=4, state=1, values=[0.9355021715164185], datetime_start=datetime.datetime(2023, 3, 11, 9, 50, 19, 584781), datetime_complete=datetime.datetime(2023, 3, 11, 10, 0, 9, 749432), params={'kernel_size': 3, 'first_layer_kernel': 38, 'second_layer_kernel': 59, 'third_layer_kernel': 33, 'activation': 'relu', 'dropout': 0.19274665425425558, 'average_pooling_size': 2, 'dense_layer_size': 103, 'dense_layer_activation': 'sigmoid'}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'kernel_size': IntDistribution(high=4, log=False, low=2, step=1), 'first_layer_kernel': IntDistribution(high=64, log=False, low=32, step=1), 'second_layer_kernel': IntDistribution(high=96, log=False, low=32, step=1), 'third_layer_kernel': IntDistribution(high=48, log=False, low=32, step=1), 'activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'dropout': FloatDistribution(high=0.3, log=False, low=0.15, step=None), 'average_pooling_size': IntDi

In [9]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 3
l1_filters = 38
l2_filters = 59
l3_filters = 33
activations = "relu"
dropout	= 0.19274665425425558
average_pooling_size = 2
dense_layer_size = 103
dense_layer_activation = "sigmoid"

kanji_1 = Sequential()
kanji_1.add(Conv2D(l1_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
kanji_1.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kanji_1.add(Conv2D(l2_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
kanji_1.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kanji_1.add(Conv2D(l3_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
kanji_1.add(Flatten())
kanji_1.add(Dense(dense_layer_size, activation=dense_layer_activation))
kanji_1.add(Dense(kanji_classes))

kanji_1.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [7]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

kanji_1_optuna_history = kanji_1.fit(kanji_train, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)
kanji_1.save('C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\kanji_models\\kanji_1.h5', save_format='h5')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


# Creating Kanji Model 2

In [8]:
def kanji_objective2(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 4)
    l1_filters = trial.suggest_int('first_layer_kernel', 32, 54)
    l2_filters = trial.suggest_int('second_layer_kernel', 20, 64)
    l3_filters = trial.suggest_int('third_layer_kernel', 32, 64)
    activations = trial.suggest_categorical('activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 128)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    # Design model
    kanji_2_design = Sequential()
    kanji_2_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
    kanji_2_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
    kanji_2_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kanji_2_design.add(Dropout(dropout))
    kanji_2_design.add(Conv2D(l3_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
    kanji_2_design.add(Flatten())
    kanji_2_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kanji_2_design.add(Dense(kanji_classes))

    kanji_2_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(kanji_2_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kanji_2_history = kanji_2_design.fit(kanji_train, epochs=kanji_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

    # Important metric for optuna to optimize over
    return kanji_2_history.history['val_accuracy'][-1]

In [10]:
# Run Study 2
fileName = 'C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\KanjiModelDevelopmentOutputs\\Kanji_Model_2.txt'
orig_stdout = sys.stdout
with open(fileName, 'w') as f:
    sys.stdout = f
    kanji_study2 = optuna.create_study(direction='maximize', study_name="Kanji-2")
    kanji_study2.optimize(kanji_objective2, n_trials=10)
    sys.stdout = orig_stdout
    f.close()

[32m[I 2023-03-14 21:26:21,963][0m A new study created in memory with name: Kanji-2[0m
[32m[I 2023-03-14 21:27:15,310][0m Trial 0 finished with value: 0.5838330388069153 and parameters: {'kernel_size': 4, 'first_layer_kernel': 38, 'second_layer_kernel': 29, 'third_layer_kernel': 42, 'activation': 'relu', 'dropout': 0.24277788353053964, 'average_pooling_size': 4, 'dense_layer_size': 88, 'dense_layer_activation': 'sigmoid'}. Best is trial 0 with value: 0.5838330388069153.[0m
[32m[I 2023-03-14 21:27:39,167][0m Trial 1 finished with value: 0.024080457165837288 and parameters: {'kernel_size': 2, 'first_layer_kernel': 37, 'second_layer_kernel': 36, 'third_layer_kernel': 36, 'activation': 'sigmoid', 'dropout': 0.17187486707995475, 'average_pooling_size': 2, 'dense_layer_size': 97, 'dense_layer_activation': 'tanh'}. Best is trial 0 with value: 0.5838330388069153.[0m
[32m[I 2023-03-14 21:28:49,299][0m Trial 2 finished with value: 0.3790547251701355 and parameters: {'kernel_size': 4, 

In [9]:
# Print the info from the best trial
print(f'Best trial info:\n{kanji_study2.best_trial}\n')
for param, value in kanji_study2.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=0, state=1, values=[0.8983899354934692], datetime_start=datetime.datetime(2023, 3, 11, 11, 44, 54, 797955), datetime_complete=datetime.datetime(2023, 3, 11, 12, 24, 51, 132705), params={'kernel_size': 4, 'first_layer_kernel': 50, 'second_layer_kernel': 58, 'third_layer_kernel': 50, 'activation': 'tanh', 'dropout': 0.20131789670326516, 'average_pooling_size': 3, 'dense_layer_size': 67, 'dense_layer_activation': 'relu'}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'kernel_size': IntDistribution(high=4, log=False, low=2, step=1), 'first_layer_kernel': IntDistribution(high=54, log=False, low=32, step=1), 'second_layer_kernel': IntDistribution(high=64, log=False, low=20, step=1), 'third_layer_kernel': IntDistribution(high=64, log=False, low=32, step=1), 'activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'dropout': FloatDistribution(high=0.3, log=False, low=0.15, step=None), 'average_pooling_size': IntDis

In [10]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 4
l1_filters = 50
l2_filters = 58
l3_filters = 50
activations = "tanh"
dropout	= 0.20131789670326516
average_pooling_size = 3
dense_layer_size = 67
dense_layer_activation = "relu"

kanji_2 = Sequential()
kanji_2.add(Conv2D(l1_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
kanji_2.add(Conv2D(l2_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
kanji_2.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kanji_2.add(Dropout(dropout))
kanji_2.add(Conv2D(l3_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
kanji_2.add(Flatten())
kanji_2.add(Dense(dense_layer_size, activation=dense_layer_activation))
kanji_2.add(Dense(kanji_classes))

kanji_2.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [12]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

kanji_2_optuna_history = kanji_2.fit(kanji_train, epochs=kanji_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)
kanji_2.save('C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\kanji_models\\kanji_2.h5', save_format='h5')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15


# KMNIST to K49 Transfer Learning

## Create K49 Base

In [9]:
def k49_base_objective(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 3)
    l1_filters = trial.suggest_int('first_layer_kernel', 20, 40)
    l2_filters = trial.suggest_int('second_layer_kernel', 40, 64)
    activations = trial.suggest_categorical('activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 80)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    k49_interpolated_shape = (64, 64, 1)

    # Design model
    k49_base_design = Sequential()
    k49_base_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=activations, input_shape=k49_interpolated_shape))
    k49_base_design.add(Dropout(dropout))
    k49_base_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=activations, input_shape=k49_interpolated_shape))
    k49_base_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    k49_base_design.add(Flatten())
    k49_base_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    k49_base_design.add(Dense(k49_classes))

    k49_base_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

    #  print(k49_base_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    # k49_base_history = k49_base_design.fit((k49_train_images, k49_train_labels),
    #                 epochs=k49_epochs, batch_size=bs, callbacks=callback,
    #                 validation_data=(k49_val_images, k49_val_labels))
    k49_base_history = k49_base_design.fit(k49_train_images,
                    epochs=k49_epochs, batch_size=bs, callbacks=callback,
                    validation_data=k49_val_images)

    # Important metric for optuna to optimize over
    return k49_base_history.history['val_accuracy'][-1]

In [10]:
# Run Study 3
outputs_path = 'C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\kanji_models\\K49_Base_Output.txt'
orig_stdout = sys.stdout
with open(outputs_path, 'w') as f:
    sys.stdout = f
    k49_base_study = optuna.create_study(direction='maximize', study_name="K49-Base")
    k49_base_study.optimize(k49_base_objective, n_trials=10)
    sys.stdout = orig_stdout
    f.close()

[32m[I 2023-03-12 09:20:44,278][0m A new study created in memory with name: K49-Base[0m
[32m[I 2023-03-12 10:36:38,080][0m Trial 0 finished with value: 0.8381196856498718 and parameters: {'kernel_size': 3, 'first_layer_kernel': 20, 'second_layer_kernel': 50, 'activation': 'relu', 'dropout': 0.2603375845263513, 'average_pooling_size': 4, 'dense_layer_size': 69, 'dense_layer_activation': 'tanh'}. Best is trial 0 with value: 0.8381196856498718.[0m
[32m[I 2023-03-12 10:54:52,678][0m Trial 1 finished with value: 0.025942355394363403 and parameters: {'kernel_size': 2, 'first_layer_kernel': 35, 'second_layer_kernel': 41, 'activation': 'sigmoid', 'dropout': 0.16680117663141514, 'average_pooling_size': 2, 'dense_layer_size': 72, 'dense_layer_activation': 'relu'}. Best is trial 0 with value: 0.8381196856498718.[0m
[32m[I 2023-03-12 13:07:06,031][0m Trial 2 finished with value: 0.7003917098045349 and parameters: {'kernel_size': 2, 'first_layer_kernel': 21, 'second_layer_kernel': 59, 'a

In [11]:
# Print the info from the best trial
print(f'Best trial info:\n{k49_base_study.best_trial}\n')
for param, value in k49_base_study.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=6, state=1, values=[0.8689910769462585], datetime_start=datetime.datetime(2023, 3, 12, 15, 1, 15, 392724), datetime_complete=datetime.datetime(2023, 3, 12, 16, 13, 8, 804234), params={'kernel_size': 3, 'first_layer_kernel': 31, 'second_layer_kernel': 47, 'activation': 'relu', 'dropout': 0.21292333203947755, 'average_pooling_size': 4, 'dense_layer_size': 80, 'dense_layer_activation': 'relu'}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'kernel_size': IntDistribution(high=3, log=False, low=2, step=1), 'first_layer_kernel': IntDistribution(high=40, log=False, low=20, step=1), 'second_layer_kernel': IntDistribution(high=64, log=False, low=40, step=1), 'activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'dropout': FloatDistribution(high=0.3, log=False, low=0.15, step=None), 'average_pooling_size': IntDistribution(high=4, log=False, low=2, step=1), 'dense_layer_size': IntDistribution(high=80, log=False, lo

In [12]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 3
l1_filters = 31
l2_filters = 47
activations = "relu"
dropout	= 0.21292333203947755
average_pooling_size = 4
dense_layer_size = 80
dense_layer_activation = "relu"

k49_base = Sequential()
k49_base.add(Conv2D(l1_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
k49_base.add(Dropout(dropout))
k49_base.add(Conv2D(l2_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
k49_base.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
k49_base.add(Flatten())
k49_base.add(Dense(dense_layer_size, activation=dense_layer_activation))
k49_base.add(Dense(k49_classes))

k49_base.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [16]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

k49_base_optuna_history = k49_base.fit(k49_train_images, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=k49_val_images)

k49_base.save('C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\kanji_models\\k49_base.h5', save_format='h5')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15


## Create Kanji Top

In [10]:
def kanji_top_objective(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 3)
    l1_filters = trial.suggest_int('first_layer_kernel', 32, 54)
    l2_filters = trial.suggest_int('second_layer_kernel', 64, 96)
    l1_activation = trial.suggest_categorical('first_layer_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('second_layer_activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 128)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    base_model = load_base_model("C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\kanji_models\\k49_base.h5")
    
    # Design model
    kanji_top_design = Sequential()
    kanji_top_design.add(base_model)
    kanji_top_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=sample_shape))
    kanji_top_design.add(Dropout(dropout))
    kanji_top_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=sample_shape))
    kanji_top_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kanji_top_design.add(Flatten())
    kanji_top_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kanji_top_design.add(Dense(kanji_classes))

    kanji_top_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(kanji_top_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kanji_top_history = kanji_top_design.fit(kanji_train, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

    # Important metric for optuna to optimize over
    return kanji_top_history.history['val_accuracy'][-1]

In [11]:
# Run Study 4
outputs_path = 'C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\kanji_models\\Kanji_TL_Output.txt'
orig_stdout = sys.stdout
with open(outputs_path, 'w') as f:
    sys.stdout = f
    kanji_top_study = optuna.create_study(direction='maximize', study_name="Kanji-TL")
    kanji_top_study.optimize(kanji_top_objective, n_trials=15)
    sys.stdout = orig_stdout
    f.close()

[32m[I 2023-03-13 10:42:07,120][0m A new study created in memory with name: Kanji-TL[0m
[32m[I 2023-03-13 10:50:45,217][0m Trial 0 finished with value: 0.9115633368492126 and parameters: {'kernel_size': 3, 'first_layer_kernel': 49, 'second_layer_kernel': 73, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.16719602950231316, 'average_pooling_size': 4, 'dense_layer_size': 93, 'dense_layer_activation': 'relu'}. Best is trial 0 with value: 0.9115633368492126.[0m
[32m[I 2023-03-13 10:59:37,781][0m Trial 1 finished with value: 0.936446487903595 and parameters: {'kernel_size': 3, 'first_layer_kernel': 52, 'second_layer_kernel': 88, 'first_layer_activation': 'relu', 'second_layer_activation': 'sigmoid', 'dropout': 0.2714984394523663, 'average_pooling_size': 3, 'dense_layer_size': 122, 'dense_layer_activation': 'tanh'}. Best is trial 1 with value: 0.936446487903595.[0m
[32m[I 2023-03-13 11:07:37,377][0m Trial 2 finished with value: 0.9290334582328796

In [12]:
# Print the info from the best trial
print(f'Best trial info:\n{kanji_top_study.best_trial}\n')
for param, value in kanji_top_study.best_params.items():
    print(f'Param: {param}\tValue: {value}')
    

Best trial info:
FrozenTrial(number=3, state=1, values=[0.9466924667358398], datetime_start=datetime.datetime(2023, 3, 13, 11, 7, 37, 378978), datetime_complete=datetime.datetime(2023, 3, 13, 11, 16, 12, 203003), params={'kernel_size': 3, 'first_layer_kernel': 50, 'second_layer_kernel': 67, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.2623467440665549, 'average_pooling_size': 2, 'dense_layer_size': 115, 'dense_layer_activation': 'sigmoid'}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'kernel_size': IntDistribution(high=3, log=False, low=2, step=1), 'first_layer_kernel': IntDistribution(high=54, log=False, low=32, step=1), 'second_layer_kernel': IntDistribution(high=96, log=False, low=64, step=1), 'first_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'second_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'dropout': FloatDistribution(high=0.3, log=False, low

In [15]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size = 3
l1_filters = 50
l2_filters = 67
activations = "tanh"
dropout	= 0.2623467440665549
average_pooling_size = 2
dense_layer_size = 115
dense_layer_activation = "sigmoid"

k49_base_model = load_base_model("C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\kanji_models\\k49_base.h5")

kanji_top = Sequential()
kanji_top.add(k49_base_model)
kanji_top.add(Conv2D(l1_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
kanji_top.add(Dropout(dropout))
kanji_top.add(Conv2D(l2_filters, kernel_size=kern_size, activation=activations, input_shape=sample_shape))
kanji_top.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kanji_top.add(Flatten())
kanji_top.add(Dense(dense_layer_size, activation=dense_layer_activation))
kanji_top.add(Dense(kanji_classes))

kanji_top.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [16]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

kanji_base_optuna_history = kanji_top.fit(kanji_train, epochs=k49_epochs, batch_size=bs,
                    callbacks=callback, validation_data=kanji_val)

kanji_top.save('C:\\Users\\LifeH\\SoftwareDevelopment\\KanjiCharacterRecognition\\kanji_models\\kanji_TL.h5', save_format='h5')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
