# Dependencies

In [None]:
#importing general packages and libraries
import numpy as np
import pandas as pd
import os
from PIL import Image
import matplotlib.pyplot as plt
import random
import ast
import math
import pickle

In [None]:
#importing packages and libraries of ML framework
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Input, Flatten, Dense, Activation, Concatenate, Dropout, BatchNormalization, GlobalAveragePooling2D, LeakyReLU, Conv2D, Add, MaxPooling2D
from tensorflow.keras import models, layers, regularizers
from tensorflow.keras.models import Model, save_model, Sequential
import tensorflow.keras.backend as K
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2, l1_l2
from tensorflow.keras.optimizers import Adam

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Function Definitions

In [None]:
def plot_training_validation_test_loss(history, test_loss, test_mae, file_path, name, title):
    # Extract training mean absolute error (MAE) and training loss (MSE) from history and evaluation
    training_mae = history.history['mae']
    validation_mae = history.history['val_mae']
    training_loss_mse = history.history['loss']
    validation_loss_mse = history.history['val_loss']

    # Number of epochs
    epochs = range(1, len(training_loss_mse) + 1)
    nr_epochs = len(epochs)

    # Create a figure with two subplots
    plt.figure(figsize=(14, 6))

    # Plot training loss with logarithmic y-axis
    plt.subplot(1, 2, 1)
    plt.plot(epochs, training_mae, label='Training MAE', marker='o', color='#903C59', markersize=3)  # Loss color
    plt.plot(epochs, validation_mae, label='Validation MAE', marker='o', color='#084C61', markersize=3)  # Loss color
    plt.plot((nr_epochs), test_mae, 'r*', label='Test MAE', markersize=10, color='#F7D08A')  # Test Loss as a star
    plt.xlabel('Epochs')
    plt.yscale('log')
    plt.ylabel('MAE')
    plt.title('Model mean absolute error')
    plt.legend()
    plt.xlim(0, (nr_epochs +1))

    # Plot validation loss (mae) with logarithmic y-axis
    plt.subplot(1, 2, 2)
    plt.plot(epochs, training_loss_mse, label='Training loss', marker='o', color='#903C59', markersize=3)  # MSE color
    plt.plot(epochs, validation_loss_mse, label='Validation loss', marker='o', color='#084C61', markersize=3)  # MSE color
    plt.plot((nr_epochs), test_loss, 'r*', label='Test loss', markersize=10, color='#F7D08A')  # Test MSE as a star
    plt.xlabel('Epochs')
    plt.yscale('log')
    plt.ylabel('MSE loss')
    plt.title('Loss curve')
    plt.legend()
    plt.xlim(0, (nr_epochs +1))

    # Adjust plot settings
    plt.subplots_adjust(wspace=0.3)  # Adjust the width space between subplots
    plt.suptitle(title, fontsize=16)

    # Save the figure with the specified filename and path, overwriting if it already exists
    plt.savefig(os.path.join(file_path, f'{name}_losscurves'), bbox_inches='tight', dpi=300)
    plt.show()

# Data preperation

In [None]:
# Define paths of training data
file_path_splitvariables_train1 = f'/content/drive/MyDrive/Thesis_LotteKat/ModelTraining/SplitVariables_train1.pkl'
file_path_splitvariables_train2 = f'/content/drive/MyDrive/Thesis_LotteKat/ModelTraining/SplitVariables_train2.pkl'
file_path_splitvariables_test = f'/content/drive/MyDrive/Thesis_LotteKat/ModelTraining/SplitVariables_test.pkl'
file_path_splitvariables_val = f'/content/drive/MyDrive/Thesis_LotteKat/ModelTraining/SplitVariables_val.pkl'

In [None]:
# Load the variables from the file using pickle
with open(file_path_splitvariables_train1, 'rb') as file:
    loaded_variables1 = pickle.load(file)

# Load the variables from the file using pickle
with open(file_path_splitvariables_train2, 'rb') as file:
    loaded_variables2 = pickle.load(file)

# Combine the data from train1 and train2
X_train_img = np.concatenate((loaded_variables1['X_train1_img'], loaded_variables2['X_train2_img']))
X_train_feat = np.concatenate((loaded_variables1['X_train1_feat'], loaded_variables2['X_train2_feat']))
y_train = np.concatenate((loaded_variables1['y_train1'], loaded_variables2['y_train2']))

In [None]:
# Load the variables from the file using pickle
with open(file_path_splitvariables_val, 'rb') as file:
    loaded_variables = pickle.load(file)

# Extract the different variables in the file
X_val_img = loaded_variables['X_val_img']
X_val_feat = loaded_variables['X_val_feat']
y_val = loaded_variables['y_val']

In [None]:
# Load the variables from the file using pickle
with open(file_path_splitvariables_test, 'rb') as file:
    loaded_variables = pickle.load(file)

# Extract the different variables in the file
X_test_img = loaded_variables['X_test_img']
X_test_feat = loaded_variables['X_test_feat']
y_test = loaded_variables['y_test']

In [None]:
#Find the total length of each set
train_len = len(X_train_img)
val_len = len(X_val_img)
test_len = len(X_test_img)

#calculate the total length to check with folder
total_set = train_len + val_len + test_len
print('total = ', total_set)
print('number of images in sets (train, val, test) =', train_len, val_len, test_len)

#Calculate and print ratios of training, validation, test set respectively
train_ratio, val_ratio, test_ratio = [len(dataset)/total_set * 100 for dataset in (X_train_img, X_val_img, X_test_img)]
print("Ratio of (train, val, test) = {}:{}:{}".format(round(train_ratio), round(val_ratio), round(test_ratio)))

total =  55662
number of images in sets (train, val, test) = 35623 8906 11133
Ratio of (train, val, test) = 64:16:20


In [None]:
# Split the labels into 2 parts, one for daylight and one for view
y_train_daylight = y_train[:, :3]
y_train_view = y_train[:, 3:]
del y_train

y_val_daylight = y_val[:, :3]
y_val_view = y_val[:, 3:]
del y_val

y_test_daylight = y_test[:, :3]
y_test_view = y_test[:, 3:]
del y_test

# ML settings

In [None]:
# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True, verbose=1)

In [None]:
# Define a learning rate annealing function with a minimum learning rate
def lr_schedule(epoch):
    iteration_per_epochs = 557
    iteration = epoch * iteration_per_epochs

    iteration_recude = 5000   # number of itertations when lr reduces
    initial_lr = 0.001      # Initial learning rate
    reduce_factor = 0.5    # Factor by which to reduce the learning rate
    min_lr = 0.00000001      # Minimum learning rate

    exponent = math.floor(iteration / iteration_recude)

    new_learning_rate_temp = initial_lr * reduce_factor ** exponent
    new_learning_rate = max(new_learning_rate_temp, min_lr)

    return new_learning_rate

# Create a learning rate scheduler callback
lr_scheduler = LearningRateScheduler(lr_schedule, verbose=1)

# Model 7 - hyper parameter adjustments

## Model 7.5 - low l2 rate & low dropout rate

In [None]:
# Define save settings for trained model
modelname = 'Model16_daylight'
title_model ='model 16 - daylight model'
folder_path_ResNets = f'/content/drive/MyDrive/Thesis_LotteKat/ModelTraining/{modelname}'
checkpoint_filepath = f'{modelname}_checkpoint.h5'

In [None]:
# Define the checkpoint callback
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,  # Save only the best model
    monitor='val_loss',  # Monitor validation loss
    mode='min',  # Mode can be 'min' or 'max' depending on what you want to monitor
    verbose=1  # Print messages about model saving
)

In [None]:
# Architecure version ResNet 4 - daylight model
# Define the input shape for images and numerical features
image_input = Input(shape=(224, 224, 3))
num_input = Input(shape=(2,))

# Load the ResNet50 model
base_model = ResNet50(weights=None, include_top=False, input_tensor=image_input)

# Unfreeze the layers in the ResNet50 model
for layer in base_model.layers:
    layer.trainable = True

# Find the output of the ResNet
resnet_output = base_model.output

# Add Batch Normalization for improved training stability
batchnorm1 = BatchNormalization()(resnet_output)

# Add a LeakyReLU activation function
leaky1 = LeakyReLU(alpha=0.1)(batchnorm1)

# Add the global average pooling layer and dense layers to the base model
glob1 = GlobalAveragePooling2D()(leaky1)
dropout1 = Dropout(0.3)(glob1)
dense1 = Dense(256, activation='relu', kernel_regularizer=l2(0.0001))(dropout1)

# Concatenate the output of the base model and the numerical input
combined = Concatenate()([dense1, num_input])

# Add Batch Normalization and LeakyReLU activation
batchnorm2 = BatchNormalization()(combined)
leaky2 = LeakyReLU(alpha=0.1)(batchnorm2)

# Add another dense layer with dropout
dense2 = Dense(128, activation='relu', kernel_regularizer=l2(0.0001))(leaky2)
dropout2 = Dropout(0.3)(dense2)

# Add the final dense layer for regression with 2 output units and linear activation
output_layer = Dense(3, activation='linear')(dropout2)

# Create the model and compile the model with mean squared error loss and mean absolute error metric
model_v16d = Model(inputs=[image_input, num_input], outputs=output_layer)
model_v16d.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

In [None]:
# Train the model
history_v16d = model_v16d.fit([X_train_img, X_train_feat], y_train_daylight,
                          epochs=100, batch_size=64,
                          validation_data=([X_val_img, X_val_feat], y_val_daylight),
                          callbacks=[model_checkpoint_callback, early_stopping, lr_scheduler])

In [None]:
# Evaluation of model
test_loss_v16d, test_mae_v16d = model_v16d.evaluate((X_test_img, X_test_feat), y_test_daylight, verbose=2)

In [None]:
# Save the trained model to the correct folder
# Check if the folder exists
if not os.path.exists(folder_path_ResNets):
    os.makedirs(folder_path_ResNets)

# Save trained model
model_v16d.save(os.path.join(folder_path_ResNets, f'{modelname}_model.h5'))

# Check if the model file was saved
if os.path.exists(os.path.join(folder_path_ResNets, f'{modelname}_model.h5')):
    print('Model file saved successfully')
else:
    print('Model file not found')

# Save history model for traceback
with open(os.path.join(folder_path_ResNets, f'{modelname}_model_history.pkl'), 'wb') as file:
    pickle.dump(history_v16d.history, file)  # Save the training history to a specific folder

# Create a dictionary to store the test metrics
test_metrics = {
    'Test Loss': test_loss_v16d,
    'Test MAE': test_mae_v16d
}

# Save the test metrics
with open(os.path.join(folder_path_ResNets, f'{modelname}_model_test.txt'), 'w') as file:
    for metric, value in test_metrics.items():
        file.write(f'{metric}: {value}\n')

In [None]:
# Plot the training restuls, loss curve and MAE
plot_training_validation_test_loss(history_v16d, test_loss_v16d, test_mae_v16d, folder_path_ResNets, name=modelname, title=title_model)

In [None]:
# Save training metric points for easier access
# Create a DataFrame from the training history
history_v16d_data = {
    'loss': history_v16d.history['loss'],
    'val_loss': history_v16d.history['val_loss'],
    'mae': history_v16d.history['mae'],
    'val_mae': history_v16d.history['val_mae'],
}
df_history_v16d = pd.DataFrame(history_v16d_data)

#Save training points
path_training_pt = f'{folder_path_ResNets}/Model4_daylight_training_points.csv'
df_history_v16d.to_csv(path_training_pt, index=False)

## Model 7.5 - low l2 rate & low dropout rate - view

In [None]:
# Define save settings for trained model
modelname = 'Model16_view'
title_model ='model 16 - view model'
folder_path_ResNets = f'/content/drive/MyDrive/Thesis_LotteKat/ModelTraining/{modelname}'
checkpoint_filepath = f'{modelname}_checkpoint.h5'

In [None]:
# Define the checkpoint callback
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,  # Save only the best model
    monitor='val_loss',  # Monitor validation loss
    mode='min',  # Mode can be 'min' or 'max' depending on what you want to monitor
    verbose=1  # Print messages about model saving
)

In [None]:
# Architecure version ResNet 4 - daylight model
# Define the input shape for images and numerical features
image_input = Input(shape=(224, 224, 3))
num_input = Input(shape=(2,))

# Load the ResNet50 model
base_model = ResNet50(weights=None, include_top=False, input_tensor=image_input)

# Unfreeze the layers in the ResNet50 model
for layer in base_model.layers:
    layer.trainable = True

# Find the output of the ResNet
resnet_output = base_model.output

# Add Batch Normalization for improved training stability
batchnorm1 = BatchNormalization()(resnet_output)

# Add a LeakyReLU activation function
leaky1 = LeakyReLU(alpha=0.1)(batchnorm1)

# Add the global average pooling layer and dense layers to the base model
glob1 = GlobalAveragePooling2D()(leaky1)
dropout1 = Dropout(0.3)(glob1)
dense1 = Dense(256, activation='relu', kernel_regularizer=l2(0.0001))(dropout1)

# Concatenate the output of the base model and the numerical input
combined = Concatenate()([dense1, num_input])

# Add Batch Normalization and LeakyReLU activation
batchnorm2 = BatchNormalization()(combined)
leaky2 = LeakyReLU(alpha=0.1)(batchnorm2)

# Add another dense layer with dropout
dense2 = Dense(128, activation='relu', kernel_regularizer=l2(0.0001))(leaky2)
dropout2 = Dropout(0.3)(dense2)

# Add the final dense layer for regression with 2 output units and linear activation
output_layer = Dense(2, activation='linear')(dropout2)

# Create the model and compile the model with mean squared error loss and mean absolute error metric
model_v16v = Model(inputs=[image_input, num_input], outputs=output_layer)
model_v16v.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

In [None]:
# Train the model
history_v16v = model_v16v.fit([X_train_img, X_train_feat], y_train_view,
                          epochs=100, batch_size=64,
                          validation_data=([X_val_img, X_val_feat], y_val_view),
                          callbacks=[model_checkpoint_callback, early_stopping, lr_scheduler])

In [None]:
# Evaluation of model
test_loss_v16v, test_mae_v16v = model_v16v.evaluate((X_test_img, X_test_feat), y_test_view, verbose=2)

In [None]:
# Save the trained model to the correct folder
# Check if the folder exists
if not os.path.exists(folder_path_ResNets):
    os.makedirs(folder_path_ResNets)

# Save trained model
model_v16v.save(os.path.join(folder_path_ResNets, f'{modelname}_model.h5'))

# Check if the model file was saved
if os.path.exists(os.path.join(folder_path_ResNets, f'{modelname}_model.h5')):
    print('Model file saved successfully')
else:
    print('Model file not found')

# Save history model for traceback
with open(os.path.join(folder_path_ResNets, f'{modelname}_model_history.pkl'), 'wb') as file:
    pickle.dump(history_v16v.history, file)  # Save the training history to a specific folder

# Create a dictionary to store the test metrics
test_metrics = {
    'Test Loss': test_loss_v16v,
    'Test MAE': test_mae_v16v
}

# Save the test metrics
with open(os.path.join(folder_path_ResNets, f'{modelname}_model_test.txt'), 'w') as file:
    for metric, value in test_metrics.items():
        file.write(f'{metric}: {value}\n')

In [None]:
# Plot the training restuls, loss curve and MAE
plot_training_validation_test_loss(history_v16v, test_loss_v16v, test_mae_v16v, folder_path_ResNets, name=modelname, title=title_model)

In [None]:
# Save training metric points for easier access
# Create a DataFrame from the training history
history_v16v_data = {
    'loss': history_v16v.history['loss'],
    'val_loss': history_v16v.history['val_loss'],
    'mae': history_v16v.history['mae'],
    'val_mae': history_v16v.history['val_mae'],
}
df_history_v16v = pd.DataFrame(history_v16v_data)

#Save training points
path_training_pt = f'{folder_path_ResNets}/Model4_daylight_training_points.csv'
df_history_v16v.to_csv(path_training_pt, index=False)

In [None]:
from pandas.core.dtypes.base import StorageExtensionDtype
StorageExtensionDtype

pandas.core.dtypes.base.StorageExtensionDtype

In [None]:
# Define save settings for trained model
modelname = 'Model6_4_daylight'
title_model ='model 6.4 - daylight model'
folder_path_ResNets = f'/content/drive/MyDrive/Thesis_LotteKat/ModelTraining/{modelname}'
checkpoint_filepath = f'{modelname}_checkpoint.h5'

In [None]:
# Define the checkpoint callback
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,  # Save only the best model
    monitor='val_loss',  # Monitor validation loss
    mode='min',  # Mode can be 'min' or 'max' depending on what you want to monitor
    verbose=1  # Print messages about model saving
)

In [None]:
# Architecure version ResNet 4 - daylight model
# Define the input shape for images and numerical features
image_input = Input(shape=(224, 224, 3))
num_input = Input(shape=(2,))

# Load the ResNet50 model
base_model = ResNet50(weights=None, include_top=False, input_tensor=image_input)

# Unfreeze the layers in the ResNet50 model
for layer in base_model.layers:
    layer.trainable = True

# Find the output of the ResNet
resnet_output = base_model.output

# Add Batch Normalization for improved training stability
batchnorm1 = BatchNormalization()(resnet_output)

# Add a LeakyReLU activation function
leaky1 = LeakyReLU(alpha=0.1)(batchnorm1)

# Add the global average pooling layer and dense layers to the base model
glob1 = GlobalAveragePooling2D()(leaky1)
dropout1 = Dropout(0.3)(glob1)
dense1 = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(dropout1)

# Concatenate the output of the base model and the numerical input
combined = Concatenate()([dense1, num_input])

# Add Batch Normalization and LeakyReLU activation
batchnorm2 = BatchNormalization()(combined)
leaky2 = LeakyReLU(alpha=0.1)(batchnorm2)

# Add another dense layer with dropout
dense2 = Dense(128, activation='relu', kernel_regularizer=l2(0.1))(leaky2)
dropout2 = Dropout(0.6)(dense2)

# Add the final dense layer for regression with 2 output units and linear activation
output_layer = Dense(3, activation='linear')(dropout2)

# Create the model and compile the model with mean squared error loss and mean absolute error metric
model_v6Dd = Model(inputs=[image_input, num_input], outputs=output_layer)
model_v6Dd.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

In [None]:
# Train the model
history_v6Dd = model_v6Dd.fit([X_train_img, X_train_feat], y_train_daylight,
                          epochs=100, batch_size=64,
                          validation_data=([X_val_img, X_val_feat], y_val_daylight),
                          callbacks=[model_checkpoint_callback, early_stopping, lr_scheduler])

In [None]:
# Evaluation of model
test_loss_v6Dd, test_mae_v6Dd = model_v6Dd.evaluate((X_test_img, X_test_feat), y_test_daylight, verbose=2)

In [None]:
# Save the trained model to the correct folder
# Check if the folder exists
if not os.path.exists(folder_path_ResNets):
    os.makedirs(folder_path_ResNets)

# Save trained model
model_v6Dd.save(os.path.join(folder_path_ResNets, f'{modelname}_model.h5'))

# Check if the model file was saved
if os.path.exists(os.path.join(folder_path_ResNets, f'{modelname}_model.h5')):
    print('Model file saved successfully')
else:
    print('Model file not found')

# Save history model for traceback
with open(os.path.join(folder_path_ResNets, f'{modelname}_model_history.pkl'), 'wb') as file:
    pickle.dump(history_v6Dd.history, file)  # Save the training history to a specific folder

# Create a dictionary to store the test metrics
test_metrics = {
    'Test Loss': test_loss_v6Dd,
    'Test MAE': test_mae_v6Dd
}

# Save the test metrics
with open(os.path.join(folder_path_ResNets, f'{modelname}_model_test.txt'), 'w') as file:
    for metric, value in test_metrics.items():
        file.write(f'{metric}: {value}\n')

In [None]:
# Plot the training restuls, loss curve and MAE
plot_training_validation_test_loss(history_v6Dd, test_loss_v6Dd, test_mae_v6Dd, folder_path_ResNets, name=modelname, title=title_model)

In [None]:
# Save training metric points for easier access
# Create a DataFrame from the training history
history_v6Dd_data = {
    'loss': history_v6Dd.history['loss'],
    'val_loss': history_v6Dd.history['val_loss'],
    'mae': history_v6Dd.history['mae'],
    'val_mae': history_v6Dd.history['val_mae'],
}
df_history_v6Dd = pd.DataFrame(history_v6Dd_data)

#Save training points
path_training_pt = f'{folder_path_ResNets}/Model4_daylight_training_points.csv'
df_history_v6Dd.to_csv(path_training_pt, index=False)