In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Reshape, Conv2D, Conv1D, MaxPooling2D, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow import keras, config
import numpy as np
import matplotlib.pyplot as plt

In [None]:
print("GPUs Available: ", len(config.list_physical_devices('GPU')))

In [None]:
%load_ext autoreload
%autoreload 2
from image_process.image_preprocessors import add_noise, add_noise_randomly, shift_random_update_positions, shift_randomly_position_labels, cut_and_right_align_raw
from data_filters import tempo_interval, take_percent
# Global settings
TEST_SPLIT_SIZE = 0.80
VALIDATION_SPLIT_SIZE = 0.90
IMAGE_TARGET_SIZE = (5, 1400, 1)
BATCH_SIZE = 128
EPOCH_SIZE = 500
IMAGE_CROP_END_WIDTH = 1250
IMAGE_CROPPED_WIDTH = IMAGE_TARGET_SIZE[1] - IMAGE_CROP_END_WIDTH
BATCH_SIZE =  128
CATEGORIES = 26
LETTER_END_POSITION = "P1"
ADD_NOISE_RANDOMLY = [-2, 30]
ADD_SIGNAL_INDENT_RANDOMLY = [12860, 12860]
SHIFT_MIN_MAX = [-200, -140]
IMAGE_PREPOCESSORS = [
    {"func": shift_randomly_position_labels, "params" : SHIFT_MIN_MAX},
    {"func": cut_and_right_align_raw, "params" : IMAGE_CROPPED_WIDTH },
]

In [None]:
def show_image(img, width, position = 0):
    plt.figure(figsize=(30,5))
    plt.xlim(0, width)
    if position != 0:
        plt.xticks(position)    
    plt.imshow(img)
    plt.show()

In [None]:
from Image_Generator_helpers import  DataSets, set_paths, global_path, Random_Item
from data_filters import tempo_interval_raw
from morse_label_funcs import code_number
set_obj: DataSets = DataSets(set_paths, global_path, [tempo_interval_raw([18, 25])])
set_obj.csv_files

shift_max = abs(SHIFT_MIN_MAX[0] - SHIFT_MIN_MAX[1])
print(shift_max)

def labels_to_one_hot(labels):
    labels_one_hot = np.zeros((labels.size, CATEGORIES))
    labels_one_hot[np.arange(labels.size),labels] = 1
    return labels_one_hot

def get_position_labels(random_items):
    return [item.csv_row[LETTER_END_POSITION].values.astype(np.float64)[0] for item in random_items]

def position_labels_post_process(labels, set_obj: DataSets, random_items: list[Random_Item]):
    label_letters = np.array([code_number.index(item.csv_row["WORD"].apply(str).values[0][0].lower()) for item in random_items])
    return labels_to_one_hot(label_letters)

def position_labels_de_normalizer(labels, set_obj: DataSets, random_items: list[Random_Item]):
    return np.array([(label * (set_obj.max_first_letter_position + shift_max - set_obj.min_first_letter_position) + set_obj.min_first_letter_position) for label in labels])


In [None]:
from keras_generators.image_generator import Image_Generator_RAW
Train_Generator_RAW = Image_Generator_RAW(
    image_amount=BATCH_SIZE * EPOCH_SIZE,
    set_obj= set_obj,
    FFT_JUMP=64,
    batch_size=BATCH_SIZE,
    image_target_size=IMAGE_TARGET_SIZE,
    image_prepocessors=IMAGE_PREPOCESSORS,
    noise_range=ADD_NOISE_RANDOMLY,
    random_signal_indent=ADD_SIGNAL_INDENT_RANDOMLY,
    label_func = get_position_labels,
    label_post_process=position_labels_post_process
    )

t, l = Train_Generator_RAW.__getitem__(0)


for idx,img in enumerate(t):
    print("label: ")
    label = l[idx]
    print(label)
    show_image(img, 200)

In [None]:
# Residual block
def get_addblock(x, kernelsize, filters):
    fx = layers.Conv2D(filters, kernelsize, activation='relu', padding='same')(x)
    fx = layers.BatchNormalization()(fx)
    fx = layers.Conv2D(filters, kernelsize, padding='same')(fx)
    out = layers.Add()([x,fx])
    out = layers.ReLU()(out)
    out = layers.BatchNormalization()(out)
    return out

In [None]:
def conv_model_catg(input_layer):
    
    # x = keras.layers.Cropping2D(cropping=((0, 0), (0,IMAGE_CROP_END_WIDTH)), data_format=None)(input_layer)

    x = get_addblock(input_layer, (3,5), 8)
    x = MaxPooling2D(pool_size=(1,2),padding="same")(x)
    x = get_addblock(x, (3,7), 8)
    x = MaxPooling2D(pool_size=(1,2),padding="same")(x)
    x = get_addblock(x, (3,3), 8)
    x = MaxPooling2D(pool_size=(1,2),padding="same")(x)
    x = get_addblock(x, (3,3), 8)
    x = MaxPooling2D(pool_size=(1,2),padding="same")(x)
  
    x = Flatten()(x)
    
    return x

In [None]:
from tensorflow.keras import layers

input_layer    = Input(shape=(5, 150, 1))
conv_model_flattened = conv_model_catg(input_layer)
output_layer_letter    = Dense(CATEGORIES, activation="softmax")(conv_model_flattened)

model = Model(inputs=input_layer, outputs=output_layer_letter)
model.compile(loss=keras.losses.CategoricalCrossentropy(), optimizer='adam', metrics=["accuracy"])

print(model.summary())

In [None]:
init_epoch = 0

In [None]:

num_epochs = 10

def fit_model(epochs):
	
	global init_epoch
	history = model.fit(
		Train_Generator_RAW,
		steps_per_epoch = EPOCH_SIZE,
		epochs = epochs + init_epoch,
		initial_epoch=init_epoch,
		verbose =1,
		# validation_data = validation_batch_generator,
		# validation_steps = int(len(train_validation) // BATCH_SIZE),
		workers=12,
		use_multiprocessing=True
	)

	
	init_epoch += epochs
	return history

history = fit_model(num_epochs)


In [None]:
from multiprocessing import Pool

In [None]:
pixel_difference = 3
def sum_up_to(generator):
    return generator.__getitem__(0)


def get_deviating_predictions(pixel_difference, generator):

    a_pool = Pool()

    result = a_pool.map(sum_up_to, [generator for a in range(generator.__len__())])

    categorical_differences = []

    for imgs_batch, labels_batch in result:

        predictions = model.predict_on_batch(imgs_batch) ## make the predictions before the loop, then insert predictions into multiprocessing functions
     
        for i in range(len(imgs_batch)):

            catg_pred = np.argmax(predictions[i])
            catg_test_label = np.argmax(labels_batch[i])

            if catg_pred != catg_test_label:
                categorical_differences.append([catg_pred, catg_test_label, imgs_batch[i]])

    return categorical_differences

In [None]:
%%capture cap --no-stderr
from training_log import Training_Data_Log, print_name, json_to_file
import inspect

data_log = Training_Data_Log()
data_log.model_config = model.to_json()
# data_log.model_config_method_string = [inspect.getsource(conv_model_position)]
# data_log.training_sets = get_sets()
# data_log.training_set_size = len(train)
# data_log.validation_set_size = len(train_validation)
# data_log.test_set_size = len(train_test)
data_log.image_pre_processors = print_name(IMAGE_PREPOCESSORS)
data_log.noise_added = ADD_NOISE_RANDOMLY
# data_log.training_data_masks = print_name(MASKS)
data_log.model_summary = model.summary()
data_log.model_optimizer = str(type(model.optimizer))
data_log.model_history = history.history
data_log.model_history_final_epoch = {k: v[-1] for k, v in history.history.items()}
data_log.total_epochs = init_epoch


In [None]:
class Regression_Results:
    image_preprocessors_test = None
    total_predictions = None
    noise_level = None
    difference_in_pixels = None
    predictions_off_by_more_than_difference = None
    predictions_incorrect_prercent = None
    model_evaluation = None

noise_levels = [0.0]
result_array = []
batches = 200
for noise_level in noise_levels:

    test_batch_generator = Train_Generator_RAW = Image_Generator_RAW(
        image_amount=BATCH_SIZE * batches,
        set_obj= set_obj,
        FFT_JUMP=64,
        batch_size=BATCH_SIZE,
        image_target_size=IMAGE_TARGET_SIZE,
        image_prepocessors=IMAGE_PREPOCESSORS,
        noise_range=ADD_NOISE_RANDOMLY,
        random_signal_indent=ADD_SIGNAL_INDENT_RANDOMLY,
        label_func = get_position_labels,
        label_post_process=position_labels_post_process
    )


    categorical_differences = get_deviating_predictions(pixel_difference, test_batch_generator)
    evaluations = model.evaluate(test_batch_generator, verbose = 0)

    results = Regression_Results()
    # results.image_preprocessors_test = print_name(image_preprocessors_test)
    results.total_predictions = batches * BATCH_SIZE
    # results.noise_level = noise_level
    results.predictions_incorrect = len(categorical_differences)
    results.predictions_incorrect_prercent = round( (  len(categorical_differences) / (batches * BATCH_SIZE)  ) * 100, 4)
    results.model_evaluation = evaluations

    result_array.append(results.__dict__)

    print(len(categorical_differences))


In [None]:
data_log.results = result_array
print(data_log.results)

In [None]:
json_to_file("logs/categorical/categorical_data_log", data_log)

In [None]:
# model.save("saved_model_categorical_raw")

In [None]:
from morse_label_funcs import code_number
print("Incorrect predictions:")
print("----------------------------------------------------------------------------------------")
for idx, diff in enumerate(categorical_differences):

    if idx > 5:
        break

    pred, correct, img = diff

    print('Prediction:', pred, code_number[pred])
    print('Correct:', correct, code_number[correct])
    show_image(img, 150)
    print("----------------------------------------------------------------------------------------")