# Import necessary libraries:

In [90]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input, Dense, Dropout, GaussianNoise
from keras.regularizers import l1_l2
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")

# Load the data:

In [91]:
data = pd.read_pickle("datasets/combined-u/x_train_split_alpha(0.005).pkl")

# Define the AE model:

In [92]:
def create_ae(input_dim, hidden_dim, encoding_dim, noise_factor):
    input_layer = Input(shape=(input_dim,), name='input')
    noisy_input = GaussianNoise(noise_factor)(input_layer)

    encoder_layer1 = Dense(hidden_dim, activation='relu', name='encoding_layer')(noisy_input)
    latent = Dense(encoding_dim, activation='relu', name='latent',
                           activity_regularizer=l1_l2(l1=0.001, l2=0.001))(encoder_layer1)

    decoder_layer1 = Dense(hidden_dim, activation='relu', name='decoding_layer')(latent)
    output_layer = Dense(input_dim, activation='sigmoid', name='decoder_output')(decoder_layer1)

    autoencoder = Model(input_layer, output_layer)
    encoder = Model(input_layer, latent)

    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder, encoder

# Train the model using 10-fold cross-validation:

In [95]:
input_dim = 35
hidden_dim = 16
encoding_dim = 4
noise_factor = 0.5

# Define cross-validation loop
kf = KFold(n_splits=10)
best_mse = float('inf')
best_model = None
loss_scores = []
fold_number = 0

for train_index, test_index in kf.split(data):
    fold_number += 1
    print(f"fold: {fold_number}")

    X_train, X_test = data.iloc[train_index], data.iloc[test_index]

    autoencoder, encoder = create_ae(input_dim, hidden_dim, encoding_dim, noise_factor)

    history = autoencoder.fit(X_train, X_train, epochs=100, batch_size=32, shuffle=True, validation_split=0.1, verbose=0)

    # Plot the validation loss over 100 epochs
    # val_loss = history.history['val_loss']
    # epochs = range(1, len(val_loss) + 1)
    # plt.plot(epochs, val_loss, 'b', label='Validation loss')
    # best_epoch = val_loss.index(min(val_loss)) + 1
    # best_val_loss = round(val_loss[best_epoch-1], 4)
    # plt.plot(best_epoch, min(val_loss), 'ro', label='Best epoch')
    # plt.annotate(f'Best epoch: {best_epoch}\nVal loss: {best_val_loss}',
    #              xy=(best_epoch, min(val_loss)), xytext=(best_epoch, min(val_loss)),
    #              arrowprops=dict(facecolor='black', shrink=0.05))
    # plt.title('Validation loss')
    # plt.xlabel('Epochs')
    # plt.ylabel('Loss')
    # plt.legend()
    # plt.show()

    # Evaluate the denoising autoencoder on the testing data for this fold
    loss_score = autoencoder.evaluate(X_test, X_test, verbose=0)
    loss_scores.append(loss_score)

    # Update best model if current model is better
    if loss_score < best_mse:
        best_mse = loss_score
        best_model = autoencoder
        print("Best model updated")
        print("Best MSE: {:.5f}\n".format(best_mse))

# Plot the model
# plot_model(autoencoder, to_file='images/autoencoder.png', show_shapes=True, show_layer_names=True)
# plot_model(encoder, to_file='images/encoder.png', show_shapes=True, show_layer_names=True)

# Select best model based on average performance
best_model_index = np.argmin(loss_scores)
print("The best model was found at index #{0}".format(best_model_index+1))
print("Best MSE: {:.5f}".format(loss_scores[best_model_index]))

# Calculate the average performance metrics across all folds
avg_loss_score = np.mean(loss_scores)

# Print the average performance metrics
print("Average loss score across all folds: {:.5f}".format(avg_loss_score))

fold: 1
Best model updated
Best MSE: 0.03844

fold: 2
Best model updated
Best MSE: 0.03501

fold: 3
fold: 4
fold: 5
fold: 6
Best model updated
Best MSE: 0.03438

fold: 7
Best model updated
Best MSE: 0.03417

fold: 8
fold: 9
fold: 10
The best model was found at index #7
Best MSE: 0.03417
Average loss score across all folds: 0.03644


# Save the best model:

In [96]:
autoencoder.save("models/combined-u/autoencoder.h5")
encoder.save("models/combined-u/encoder.h5")

