In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Reshape, Conv2D, Conv1D, MaxPooling2D, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow import keras, config
import numpy as np
import matplotlib.pyplot as plt

In [None]:
print("GPUs Available: ", len(config.list_physical_devices('GPU')))

In [None]:
from image_process.image_preprocessors import add_noise_randomly, cut_and_center, cut_and_right_align, shift_randomly, Preprocess
from data_filters import tempo_interval, take_percent
# Global settings
TEST_SPLIT_SIZE = 0.80
VALIDATION_SPLIT_SIZE = 0.90
IMAGE_TARGET_SIZE = (5, 1400, 1)
IMAGE_CROP_END_WIDTH = 1250
IMAGE_CROPPED_WIDTH = IMAGE_TARGET_SIZE[1] - IMAGE_CROP_END_WIDTH
BATCH_SIZE =  128
CATEGORIES = 26
ADD_NOISE_RANDOMLY = [0, 15]
LETTER_END_POSITION = "P1"
IMAGE_PREPOCESSORS = [
    {"func": cut_and_right_align, "params" : [IMAGE_CROPPED_WIDTH] },
    {"func": shift_randomly, "params" : [-10, 0]},
    {"func": add_noise_randomly, "params":  ADD_NOISE_RANDOMLY }
]
MASKS = [
    {"func" : tempo_interval, "params" : [18, 25]}, 
    {"func": take_percent, "params": 100}
]

In [None]:

%load_ext autoreload
%autoreload 2

from morse_helpers import create_sets
from morse_label_funcs import  labels_to_one_hot_positions_categorical, letter_n_to_index, position_regression

def get_sets():
    return [
        ["./training_data/MorseTrainSet_18/GEN18_VER_012/", 'wordsMatrices_18_012', "Words_18_012.csv"],
        ["./training_data/MorseTrainSet_18/GEN18_VER_021/", 'wordsMatrices_18_021', "Words_18_021.csv"],
        ["./training_data/MorseTrainSet_18/GEN18_VER_022/", 'wordsMatrices_18_022', "Words_18_022.csv"],
        ["./training_data/MorseTrainSet_18/GEN18_VER_111/", 'wordsMatrices_18_111', "Words_18_111.csv"],
        ["./training_data/MorseTrainSet_18/GEN18_VER_222/", 'wordsMatrices_18_222', "Words_18_222.csv"],
        ["./training_data/MorseTrainSet_18/GEN18_VER_301/", 'wordsMatrices_18_301', "Words_18_301.csv"],
        ["./training_data/MorseTrainSet_18/GEN18_VER_320/", 'wordsMatrices_18_320', "Words_18_320.csv"],
        ["./training_data/MorseTrainSet_18/GEN18_VER_411/", 'wordsMatrices_18_411', "Words_18_411.csv"],
        ["./training_data/MorseTrainSet_18/GEN18_VER_410/", 'wordsMatrices_18_410', "Words_18_410.csv"],
        ["./training_data/MorseTrainSet_18/GEN18_VER_402/", 'wordsMatrices_18_402', "Words_18_402.csv"],
    ] 

(image_fnames, morse_labels) = create_sets(
    get_sets(),
    IMAGE_TARGET_SIZE,
    [position_regression, letter_n_to_index],
    letter_n=LETTER_END_POSITION,
    overwrite_images=False,
    masks=MASKS
)

In [None]:
# Concat label arrays for shuffling
morse_labels_concat = np.array([morse_labels[0], morse_labels[1]]).T

In [None]:
# Improve me
def labels_to_one_hot(labels):
    label_letters = labels[1].astype("int")
    labels_one_hot = np.zeros((label_letters.size, CATEGORIES))
    labels_one_hot[np.arange(label_letters.size),label_letters] = 1
    return labels_one_hot


In [None]:
from morse_helpers import create_all_sets
train, labels, train_validation, labels_validation, train_test, labels_test = create_all_sets(
    image_fnames, morse_labels_concat, TEST_SPLIT_SIZE, VALIDATION_SPLIT_SIZE, shuffle_before_test_split=True)


In [None]:
from keras_generators.image_generator import Image_Generator

training_batch_generator = Image_Generator(train, labels, BATCH_SIZE, IMAGE_TARGET_SIZE, IMAGE_PREPOCESSORS, labels_to_one_hot)
validation_batch_generator = Image_Generator(train_validation, labels_validation, BATCH_SIZE, IMAGE_TARGET_SIZE, IMAGE_PREPOCESSORS, labels_to_one_hot)

In [None]:
# Residual block
def get_addblock(x, kernelsize, filters):
    fx = layers.Conv2D(filters, kernelsize, activation='relu', padding='same')(x)
    fx = layers.BatchNormalization()(fx)
    fx = layers.Conv2D(filters, kernelsize, padding='same')(fx)
    out = layers.Add()([x,fx])
    out = layers.ReLU()(out)
    out = layers.BatchNormalization()(out)
    return out

In [None]:
def conv_model_catg(input_layer):
    
    x = keras.layers.Cropping2D(cropping=((0, 0), (0,IMAGE_CROP_END_WIDTH)), data_format=None)(input_layer)

    x = get_addblock(x, (3,5), 8)
    x = MaxPooling2D(pool_size=(1,2),padding="same")(x)
    x = get_addblock(x, (3,7), 8)
    x = MaxPooling2D(pool_size=(1,2),padding="same")(x)
    x = get_addblock(x, (3,3), 8)
    x = MaxPooling2D(pool_size=(1,2),padding="same")(x)
    x = get_addblock(x, (3,3), 8)
    x = MaxPooling2D(pool_size=(1,2),padding="same")(x)
  
    x = Flatten()(x)
    
    return x

In [None]:
from tensorflow.keras import layers

input_layer    = Input(shape=IMAGE_TARGET_SIZE)
conv_model_flattened = conv_model_catg(input_layer)
output_layer_letter    = Dense(CATEGORIES, activation="softmax")(conv_model_flattened)

model = Model(inputs=input_layer, outputs=output_layer_letter)
model.compile(loss=keras.losses.CategoricalCrossentropy(), optimizer='adam', metrics=["accuracy"])

print(model.summary())

In [None]:
init_epoch = 0

In [None]:

num_epochs = 2

def fit_model(epochs):
	
	
	global init_epoch
	history = model.fit(
					   training_batch_generator,
	                   steps_per_epoch = int(len(train) // BATCH_SIZE),
	                   epochs = epochs + init_epoch,
					   initial_epoch=init_epoch,
	                   verbose =1,
	                   validation_data = validation_batch_generator,
	                   validation_steps = int(len(train_validation) // BATCH_SIZE))
	
	
	init_epoch += epochs
	return history

history = fit_model(num_epochs)

In [None]:
def show_image(img, width=300):
    plt.figure(figsize=(30,5))
    plt.xlim(0, width)
    plt.imshow(img)
    plt.show()

In [None]:

def get_deviating_predictions(generator, predictions):
    categorical_differences = []
    indexer = 0

    for imgs_batch, labels_batch in generator:

        for i in range(len(imgs_batch)):

            catg_pred = np.argmax(predictions[indexer])
            catg_test_label = np.argmax(labels_batch[i])

            if catg_pred != catg_test_label:

                categorical_differences.append([catg_pred, catg_test_label, imgs_batch[i], predictions[indexer]])

            indexer += 1

    return categorical_differences

In [None]:
# %%capture cap --no-stderr
from training_log import Training_Data_Log, print_name, json_to_file
import inspect

# TODO : model config can not be serialized to json here.

data_log = Training_Data_Log()
data_log.model_config = None
data_log.model_config_method_string = inspect.getsource(conv_model_catg)
data_log.training_sets = get_sets()
data_log.training_set_size = len(train)
data_log.validation_set_size = len(train_validation)
data_log.test_set_size = len(train_test)
data_log.image_pre_processors = print_name(IMAGE_PREPOCESSORS)
data_log.noise_added = ADD_NOISE_RANDOMLY
data_log.training_data_masks = print_name(MASKS)
data_log.model_summary = model.summary()
data_log.model_optimizer = str(type(model.optimizer))
data_log.model_history = history.history
data_log.model_history_final_epoch = {k: v[-1] for k, v in history.history.items()}


In [None]:
class Categorical_Results:
    image_preprocessors_test = None
    total_predictions = None
    noise_level = None
    predictions_incorrect = None
    predictions_incorrect_percent = None
    model_evaluation = None

noise_levels = [[0, 1]]
result_array = []
for noise_level in noise_levels:

    IMAGE_PREPROCESSORS_TEST = [
        {"func": cut_and_right_align, "params" : [IMAGE_CROPPED_WIDTH] },
        {"func": shift_randomly, "params" : [-10, 0]},
        {"func": add_noise_randomly, "params":  noise_level }
    ]

    test_batch_generator = Image_Generator(train_test, labels_test, BATCH_SIZE, IMAGE_TARGET_SIZE, IMAGE_PREPROCESSORS_TEST, labels_to_one_hot)
    predictions = model.predict(test_batch_generator)
    evaluations = model.evaluate(test_batch_generator, verbose = 0)
    categorical_differences = get_deviating_predictions(test_batch_generator, predictions)

    results = Categorical_Results()
    results.image_preprocessors_test = print_name(IMAGE_PREPROCESSORS_TEST)
    results.total_predictions = len(predictions)
    results.noise_level = noise_level
    results.predictions_incorrect = len(categorical_differences)
    results.predictions_incorrect_percent = round( (len(categorical_differences) / len(predictions) * 100), 4)
    results.model_evaluation = evaluations

    result_array.append(results.__dict__)

In [None]:
data_log.results = result_array

In [None]:
json_to_file("categorical_log/categorical_data_log", data_log)

In [None]:
from morse_label_funcs import code_number
print("Incorrect predictions:")
print("----------------------------------------------------------------------------------------")
for idx, diff in enumerate(categorical_differences):

    if idx > 5:
        break

    pred, correct, img, one_hot = diff

    print('All prediction scores:')
    print(np.round(one_hot, 2))

    print('Prediction:', pred, code_number[pred])
    print('Correct:', correct, code_number[correct])
    show_image(img)
    print("----------------------------------------------------------------------------------------")