### Import libraries

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GaussianNoise, Layer, Conv1D, Flatten, Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib  # For saving the model
from tabulate import tabulate
import os
import joblib
import time

from customLayers import RandomFlip3D, RandomFlip3D_FlatInput, RandomRotation3D_FlatInput

2025-02-20 18:04:20.982751: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740067461.088911     805 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740067461.118508     805 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-20 18:04:21.346846: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Dataset overview

In [2]:
# Define file names
dataset_folder = 'output_datasets'
file_names = ['final_robotic_hand_dataset']

# Read CSV files into DataFrames
dataframes = {name: pd.read_csv(os.path.join(dataset_folder, f"{name}.csv")) for name in file_names}

# Display basic info about each DataFrame
for name, df in dataframes.items():
    print(f"{name} - Shape: {df.shape}")

final_robotic_hand_dataset - Shape: (14926, 68)


### Seperate dataframe columns into input and output

In [3]:
# Prepare input (X) and output (y)
motor_columns = ["Motor_1", "Motor_2", "Motor_3", "Motor_4", "Motor_5"]
def preprocess_data(df):
    X = df.drop(columns=motor_columns)
    y = df[motor_columns]
    
    # Convert to numpy arrays
    X = np.vstack(X.values)
    y = y.values.reshape(-1, len(motor_columns))  # Ensure correct shape
    
    return X, y

In [4]:
# Split input and output data
training_data = {}
for name, df in dataframes.items():
    training_data[name] = {}
    input, output = preprocess_data(df)
    training_data[name]['input'] = input
    training_data[name]['output'] = output

In [5]:
# Compute and print min and max values for inputs and outputs per column
for name, data in training_data.items():
    input_data = data['input']
    output_data = data['output']

    input_min = np.min(input_data)
    input_max = np.max(input_data)
    
    output_min = np.min(output_data, axis=0)  # Min per column
    output_max = np.max(output_data, axis=0)  # Max per column

    print(f"Dataset: {name}")
    print(f"  Input Min: {input_min}")
    print(f"  Input Max: {input_max}")
    print(f"  Output Min per column: {output_min}")
    print(f"  Output Max per column: {output_max}")
    print("-" * 50)


Dataset: final_robotic_hand_dataset
  Input Min: -0.0960271159807841
  Input Max: 0.0990729369223117
  Output Min per column: [0. 0. 0. 0. 0.]
  Output Max per column: [160. 160. 160. 160. 100.]
--------------------------------------------------


### Normalize

In [None]:
# Split input and output data
for name, df in dataframes.items():
    # Divide the first 4 columns by 160 and the 5th column by 100
    output = training_data[name]['output']
    output[:, :4] /= 160
    output[:, 4] /= 100
    training_data[name]['output'] = output

In [7]:
# Display basic info about each DataFrame
for name, value in training_data.items():
    print(f"{name} - Input Shape: {value['input'].shape}, Output Shape: {value['output'].shape}")

final_robotic_hand_dataset - Input Shape: (14926, 63), Output Shape: (14926, 5)


In [8]:
# Iterate through each key in training_dataset
for key in list(training_data.keys()):  # Use list() to avoid modifying dict while iterating
    X = training_data[key]['input']  # Extract input data
    y = training_data[key]['output']  # Keep output unchanged
    
    # Step 1: Remove first three elements along axis 1
    X_modified = X[:, 3:]  # Shape becomes (a, b-3)

    # Step 2: Reshape (a, b-3) -> (a, c, 3), ensuring c = (b-3) / 3 is an integer
    c = (X_modified.shape[1]) // 3  # Compute c
    X_reshaped = X_modified.reshape(X_modified.shape[0], c, 3)  # New shape: (a, c, 3)

    # Step 3: Add new entry with '_3d' prefix
    training_data[key + "_3d"] = {
        "input": X_reshaped,
        "output": y  # Output remains unchanged
    }

In [9]:
# Display basic info about each DataFrame
for name, value in training_data.items():
    print(f"{name} - Input Shape: {value['input'].shape}, Output Shape: {value['output'].shape}")

final_robotic_hand_dataset - Input Shape: (14926, 63), Output Shape: (14926, 5)
final_robotic_hand_dataset_3d - Input Shape: (14926, 20, 3), Output Shape: (14926, 5)


## Model training on robotic hand data

In [16]:
# Global Variables for Dataset Splits
TRAIN_SPLIT = 0.8
VAL_SPLIT = 0.1
TEST_SPLIT = 0.1

# Training epochs
EPOCHS = 1500

### Training

In [None]:
# List of datasets to use (set this dynamically)
datasets_to_use = list(training_data.keys())  # Default: All datasets
models_to_train = ["svm","linear_regression","random_forest","gradient_boost","mlp" ,"mlp_flipping", "mlp_rotation", "mlp_flipping_rotation"]
output_results_folder = 'training_plots'

In [18]:
def split_data(X, y):
    """Splits the dataset into training, validation, and test sets."""
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=(1 - TRAIN_SPLIT), shuffle=True, random_state=5)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=(TEST_SPLIT / (VAL_SPLIT + TEST_SPLIT)), shuffle=True, random_state=42)
    
    print(X_train.shape)
    print(X_val.shape)
    print(X_test.shape)
    return X_train, X_val, X_test, y_train, y_val, y_test

In [19]:
class RandomRotation3D(Layer):
    def __init__(self, rotation_prob, **kwargs):
        super(RandomRotation3D, self).__init__(**kwargs)
        self.rotation_prob = rotation_prob

    def call(self, inputs, training=None):
        """Applies random rotations around x, y, and/or z axes (each with probability)
           only during training. During inference, returns inputs unchanged.
        """
        if not training:
            return inputs  # No rotation during inference

        def rotate():
            # 1) Decide (individually) whether to rotate around each axis
            rotate_x = tf.random.uniform(()) < self.rotation_prob
            rotate_y = tf.random.uniform(()) < self.rotation_prob
            rotate_z = tf.random.uniform(()) < self.rotation_prob

            # 2) Generate random angles for each axis
            angle_x = tf.random.uniform(shape=(), minval=-(np.pi/9), maxval=(np.pi/9))
            angle_y = tf.random.uniform(shape=(), minval=-(np.pi/9), maxval=(np.pi/9))
            angle_z = tf.random.uniform(shape=(), minval=-(np.pi/9), maxval=(np.pi/9))

            # 3) Define rotation matrices for each axis
            #    (If you prefer degrees, you can convert or sample differently.)
            def rotation_matrix_x(angle):
                c = tf.cos(angle)
                s = tf.sin(angle)
                return tf.convert_to_tensor([
                    [1.0, 0.0, 0.0],
                    [0.0,    c,   -s],
                    [0.0,    s,    c]
                ], dtype=tf.float32)

            def rotation_matrix_y(angle):
                c = tf.cos(angle)
                s = tf.sin(angle)
                return tf.convert_to_tensor([
                    [   c, 0.0,    s],
                    [ 0.0, 1.0,  0.0],
                    [  -s, 0.0,    c]
                ], dtype=tf.float32)

            def rotation_matrix_z(angle):
                c = tf.cos(angle)
                s = tf.sin(angle)
                return tf.convert_to_tensor([
                    [   c,   -s, 0.0],
                    [   s,    c, 0.0],
                    [ 0.0,  0.0, 1.0]
                ], dtype=tf.float32)

            # 4) Conditionally build up the full rotation matrix
            R = tf.eye(3, dtype=tf.float32)

            R = tf.cond(rotate_x,
                        true_fn=lambda: tf.linalg.matmul(R, rotation_matrix_x(angle_x)),
                        false_fn=lambda: R)
            R = tf.cond(rotate_y,
                        true_fn=lambda: tf.linalg.matmul(R, rotation_matrix_y(angle_y)),
                        false_fn=lambda: R)
            R = tf.cond(rotate_z,
                        true_fn=lambda: tf.linalg.matmul(R, rotation_matrix_z(angle_z)),
                        false_fn=lambda: R)

            # 5) Apply the final rotation matrix to the input
            return tf.linalg.matmul(inputs, R)

        # Always call rotate() in training mode. If none of x,y,z are chosen, 
        # the inputs remain unchanged.
        return rotate()


In [20]:
def build_cnn_mlp_model(input_shape, model_name, hidden_units, gaussian_noise_stddev=0.01,activation_function ='leaky_relu',dropout_rate=0.1, flip_prob=0.5, rotation_prob=0.5):
    """Builds a CNN + MLP model dynamically based on model_name for 3D dataset inputs (batch, 20, 3)."""

    learning_rate = 1e-4
    num_hidden_units = hidden_units['num_hidden_units']
    hidden_units_size = hidden_units['hidden_units_size']
    
    input_layer = Input(shape=(input_shape, 3))
    x = input_layer
    
    if model_name == "mlp_flipping":
        x = RandomFlip3D(flip_prob=flip_prob)(x)
    elif model_name == "mlp_rotation":
        x = RandomRotation3D(rotation_prob=rotation_prob)(x)
    elif model_name == "mlp_flipping_rotation":
        x = RandomFlip3D(flip_prob=flip_prob)(x)
        x = RandomRotation3D(rotation_prob=rotation_prob)(x)
    
    x = GaussianNoise(gaussian_noise_stddev)(x)
    x = Conv1D(filters=64, kernel_size=4, strides=4, activation=activation_function, padding='valid')(x)
    x = Flatten()(x)

    if num_hidden_units==3:
        x = Dense(hidden_units_size[0], activation=activation_function)(x)
        x = Dropout(dropout_rate)(x)
        x = Dense(hidden_units_size[1], activation=activation_function)(x)
        x = Dropout(dropout_rate)(x)
        x = Dense(hidden_units_size[2], activation=activation_function)(x)
    if num_hidden_units==2:
        x = Dense(hidden_units_size[0], activation=activation_function)(x)
        x = Dropout(dropout_rate)(x)
        x = Dense(hidden_units_size[1], activation=activation_function)(x)
    else:
        x = Dense(hidden_units_size[0], activation=activation_function)(x)
    output_layer = Dense(5, activation= 'sigmoid')(x)

    # Build model
    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

    # Compile model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='mse',
                  metrics=['mae'])

    return model


In [21]:
def train_linear_regression(X_train, y_train):
    """Trains a multi-output Linear Regression model."""
    print("Training Linear Regression Model...")
    model = MultiOutputRegressor(LinearRegression())
    model.fit(X_train, y_train)
    return model

def train_svm(X_train, y_train, kernel='rbf', C=1.0, epsilon=0.1):
    """Trains a multi-output Support Vector Machine (SVM) model."""
    print(f"Training SVM with kernel='{kernel}', C={C}, epsilon={epsilon}...")
    model = MultiOutputRegressor(SVR(kernel=kernel, C=C, epsilon=epsilon))
    model.fit(X_train, y_train)
    return model

def build_svm(kernel='rbf', C=1.0, epsilon=0.1):
    """Builds a multi-output Support Vector Machine (SVM) model."""
    model = MultiOutputRegressor(SVR(kernel=kernel, C=C, epsilon=epsilon))
    return model

def train_gradient_boosting(X_train, y_train, n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42):
    """Trains a multi-output Gradient Boosting Regressor model."""
    print(f"Training Gradient Boosting with n_estimators={n_estimators}, learning_rate={learning_rate}, max_depth={max_depth}...")
    model = MultiOutputRegressor(
        GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=random_state)
    )
    model.fit(X_train, y_train)
    return model

def build_gradient_boosting(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42):
    """Builds a multi-output Gradient Boosting Regressor model."""
    model = MultiOutputRegressor(
        GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=random_state)
    )
    return model

In [22]:
def build_mlp(input_shape, model_name, hidden_units, gaussian_noise_stddev=0.01 ,activation_function ='leaky_relu',dropout_rate=0.1, flip_prob=0.5, rotation_prob=0.5):
    """Builds an MLP model based on the model_name."""
    #hidden_units = [256, 128]
    #activation_function = 'relu'
    learning_rate = 1e-4
    num_hidden_units = hidden_units['num_hidden_units']
    hidden_units_size = hidden_units['hidden_units_size']
    
    layers = []
    layers.append(Input(shape=(input_shape,)))
    
    if model_name == "mlp_flipping":
        layers.append(RandomFlip3D_FlatInput(flip_prob=flip_prob))
    elif model_name == "mlp_rotation":
        layers.append(RandomRotation3D_FlatInput(rotation_prob=rotation_prob))
    elif model_name == "mlp_flipping_rotation":
        layers.append(RandomFlip3D_FlatInput(flip_prob=flip_prob))
        layers.append(RandomRotation3D_FlatInput(rotation_prob=rotation_prob))
    
    if num_hidden_units==3:
        layers.extend([
            GaussianNoise(gaussian_noise_stddev),
            Dense(hidden_units_size[0], activation=activation_function),
            Dropout(dropout_rate),
            Dense(hidden_units_size[1], activation=activation_function),
            Dropout(dropout_rate),
            Dense(hidden_units_size[2], activation=activation_function),
            Dense(5, activation= 'sigmoid')
        ])
    elif num_hidden_units==2:
        layers.extend([
            GaussianNoise(gaussian_noise_stddev),
            Dense(hidden_units_size[0], activation=activation_function),
            Dropout(dropout_rate),
            Dense(hidden_units_size[1], activation=activation_function),
            Dense(5, activation= 'sigmoid')
        ])
    else:
        layers.extend([
            GaussianNoise(gaussian_noise_stddev),
            Dense(hidden_units_size[0], activation=activation_function),
            Dense(5, activation= 'sigmoid')
        ])

    # Build and compile model
    model = Sequential(layers)
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='mse',
                  metrics=['mae'])
    
    return model


In [23]:
# Store history of all MLP models for combined plotting
mlp_training_histories = {}

In [24]:
def train_mlp(X_train, y_train, X_val, y_val, dataset_name, model_name, batch_size, gaussian_noise_stddev, hidden_units,activation_function, dropout_rate, rotation_prob=0.0, epochs=EPOCHS):
    """Trains an MLP model and returns it along with the training history."""

    # Convert data into TensorFlow datasets for efficiency
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(X_train.shape[0]).batch(batch_size)
    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size)

    input_shape = X_train.shape[1]

    # Automatically select CNN-MLP for 3D datasets
    if "3d" in dataset_name:
        model = build_cnn_mlp_model(input_shape, model_name, gaussian_noise_stddev=gaussian_noise_stddev, hidden_units=hidden_units, activation_function=activation_function,rotation_prob=rotation_prob,dropout_rate=dropout_rate)
    else:
        model = build_mlp(input_shape, model_name, gaussian_noise_stddev=gaussian_noise_stddev, hidden_units=hidden_units, activation_function=activation_function,rotation_prob=rotation_prob,dropout_rate=dropout_rate)

    early_stopping = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)
    lr_decay = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=20, min_lr=1e-6)

    history = model.fit(
        train_dataset,
        validation_data=val_dataset,
        epochs=epochs,
        callbacks=[early_stopping, lr_decay]
    )

    # Store history for combined plot
    #if dataset_name not in mlp_training_histories:
    #    mlp_training_histories[dataset_name] = {}
    #mlp_training_histories[dataset_name][model_name] = history.history
    
    # Plot training loss with dataset and model name in the title
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.title(f"Training Progress - {model_name} on {dataset_name}")

    # Create directory for saving plots if it doesn't exist
    plots_folder = output_results_folder
    os.makedirs(os.path.join(plots_folder,dataset_name), exist_ok=True)
    
    # Define filename and save plot as PNG
    results_model_name = f"{model_name}_noise_{gaussian_noise_stddev}_layers_{hidden_units['num_hidden_units']}_hidden{hidden_units['hidden_units_size']}_activation_{activation_function}_dropout_{dropout_rate})"
    filename =  os.path.join(plots_folder,dataset_name,f"{results_model_name}_{dataset_name}.png")    
    plt.savefig(filename, dpi=300)
    print(f"Saved training plot: {filename}")

    #plt.show()

    return model, history


In [25]:
def evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test, model_name, dataset_name):
    """Evaluates the model and returns the metrics including average inference time."""

    # Measure inference time on the test set
    start_time = time.time()
    y_pred_test = model.predict(X_test)
    inference_time = time.time() - start_time
    avg_inference_time = (inference_time / len(X_test)) * 1000  # Average time per sample

    # Predict on train and validation sets
    y_pred_train = model.predict(X_train)
    y_pred_val = model.predict(X_val)

    # Define scaling factors
    scaling_factors = np.array([160, 160, 160, 160, 100])

    # Compute MAE in original scale
    def compute_scaled_mae(y_true, y_pred):
        abs_errors = np.abs(y_true - y_pred)  # Compute absolute errors
        scaled_errors = abs_errors * scaling_factors  # Apply scaling
        return np.mean(scaled_errors)  # Compute mean of scaled errors

    # Compute metrics
    metrics = {
        "Dataset": dataset_name,
        "Model": model_name,
        "Train MSE": mean_squared_error(y_train, y_pred_train),
        "Train MAE": mean_absolute_error(y_train, y_pred_train),
        "Train MAE (deg)": compute_scaled_mae(y_train, y_pred_train),
        "Val MSE": mean_squared_error(y_val, y_pred_val),
        "Val MAE": mean_absolute_error(y_val, y_pred_val),
        "Val MAE (deg)": compute_scaled_mae(y_val, y_pred_val),
        "Test MSE": mean_squared_error(y_test, y_pred_test),
        "Test MAE": mean_absolute_error(y_test, y_pred_test),
        "Test MAE (deg)": compute_scaled_mae(y_test, y_pred_test),
        "Avg Inference Time (ms/sample)": avg_inference_time  # Added average inference time
    }

    print(f"Dataset: {dataset_name})")
    print(f"Train MAE (deg): {compute_scaled_mae(y_train, y_pred_train)}")
    print(f"Val MAE (deg): {compute_scaled_mae(y_val, y_pred_val)}")
    print(f"Test MAE (deg): {compute_scaled_mae(y_test, y_pred_test)}")
    return metrics


In [None]:
# Training Process
results = []
mlp_training_histories = {}
trained_models = {}

for dataset_name in datasets_to_use:
    mlp_training_histories[dataset_name] = {}
    trained_models[dataset_name] = {}
    print(f"\nProcessing Dataset: {dataset_name}")
    X, y = training_data[dataset_name]['input'], training_data[dataset_name]['output']

    # Split the data
    X_train, X_val, X_test, y_train, y_val, y_test = split_data(X, y)

    # Train and evaluate models
    if ("linear_regression" in models_to_train) and ("3d" not in dataset_name) and ("4" not in dataset_name):
        print("linear_regression")
        start_time = time.time()
        model = train_linear_regression(X_train, y_train)
        training_time = time.time() - start_time
        trained_models[dataset_name]["linear_regression"] = model
        # Save the trained model
        #joblib.dump(model, "linear_regression.pkl")
        model_results = evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test, "Linear Regression", dataset_name)
        model_results["Training Time (s)"] = training_time
        results.append(model_results)
        
    if ("svm" in models_to_train) and ("3d" not in dataset_name) and ("4" not in dataset_name):
        C_options = [10]
        epsilon_options = [0.1]
        for C in C_options:
            for epsilon in epsilon_options:
                results_model_name = f"SVM - C: {C}, epsilon: {epsilon}"
                print(results_model_name)
                start_time = time.time()
                model = train_svm(X_train, y_train, C=C, epsilon=epsilon)
                training_time = time.time() - start_time
                trained_models[dataset_name][results_model_name] = model
                #joblib.dump(model, "svm.pkl")
                model_results = evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test, results_model_name, dataset_name)
                model_results["Training Time (s)"] = training_time
                results.append(model_results)

    if ("gradient_boost" in models_to_train) and ("3d" not in dataset_name) and ("4" not in dataset_name):
        n_estimators_options = [200]
        learning_rate_options = [0.2]
        for n_estimators in n_estimators_options:
            for learning_rate in learning_rate_options:
                results_model_name = f"Gradient Boost - n_estimators: {n_estimators}, learning_rate: {learning_rate}"
                print(results_model_name)
                start_time = time.time()
                model = train_gradient_boosting(X_train,y_train, n_estimators=n_estimators, learning_rate=learning_rate)
                training_time = time.time() - start_time
                trained_models[dataset_name][results_model_name] = model
                joblib.dump(model, "gradient_boost.pkl")
                model_results = evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test, results_model_name, dataset_name)
                model_results["Training Time (s)"] = training_time
                results.append(model_results)

    if "mlp" in models_to_train:
        gaussian_stdev_options = [0]
        hidden_units_options = [
            #{'num_hidden_units': 1, 'hidden_units_size': [256]},
            #{'num_hidden_units': 1, 'hidden_units_size': [512]},
            #{'num_hidden_units': 1, 'hidden_units_size': [1024]},
            #{'num_hidden_units': 2, 'hidden_units_size': [256,256]},
            #{'num_hidden_units': 2, 'hidden_units_size': [512,256]},
            #{'num_hidden_units': 2, 'hidden_units_size': [512,512]},
            #{'num_hidden_units': 2, 'hidden_units_size': [1024,512]},
            #{'num_hidden_units': 2, 'hidden_units_size': [1024,1024]},
            #{'num_hidden_units': 3, 'hidden_units_size': [512,256, 256]},
            #{'num_hidden_units': 3, 'hidden_units_size': [512,512, 256]},
            #{'num_hidden_units': 3, 'hidden_units_size': [1024,512, 512]},
            {'num_hidden_units': 3, 'hidden_units_size': [1024,1024, 512]},
        ]
        dropout_rate_options = [0]
        activation_function_options = ['leaky_relu']
        batch_size_options = [32]
        for gaussian_std in gaussian_stdev_options:
            for hidden_units in hidden_units_options:
                for dropout_rate in dropout_rate_options:
                    for activation_function in activation_function_options:
                        for batch_size in batch_size_options:
                            results_model_name = f"final_model_3d"
                            print(results_model_name)
                            start_time = time.time()
                            model, history = train_mlp(X_train, y_train, X_val, y_val, dataset_name, model_name="mlp", batch_size=batch_size,
                                            gaussian_noise_stddev=gaussian_std, hidden_units=hidden_units, activation_function=activation_function,dropout_rate=dropout_rate)
                            training_time = time.time() - start_time
                            trained_models[dataset_name][results_model_name] = 0
                            model.save(f'{results_model_name}.keras')
                            model_results = evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test, results_model_name, dataset_name)
                            model_results["Training Time (s)"] = training_time
                            results.append(model_results)
                            mlp_training_histories[dataset_name][results_model_name] = history.history

    if "mlp_rotation" in models_to_train:
        gaussian_stdev_options = [0]
        hidden_units_options = [
            #{'num_hidden_units': 1, 'hidden_units_size': [256]},
            #{'num_hidden_units': 1, 'hidden_units_size': [512]},
            #{'num_hidden_units': 1, 'hidden_units_size': [1024]},
            #{'num_hidden_units': 2, 'hidden_units_size': [256,256]},
            #{'num_hidden_units': 2, 'hidden_units_size': [512,256]},
            #{'num_hidden_units': 2, 'hidden_units_size': [512,512]},
            #{'num_hidden_units': 2, 'hidden_units_size': [1024,512]},
            #{'num_hidden_units': 2, 'hidden_units_size': [1024,1024]},
            #{'num_hidden_units': 3, 'hidden_units_size': [512,256, 256]},
            #{'num_hidden_units': 3, 'hidden_units_size': [512,512, 256]},
            #{'num_hidden_units': 3, 'hidden_units_size': [1024,512, 512]},
            {'num_hidden_units': 3, 'hidden_units_size': [1024,1024, 512]},
        ]
        dropout_rate_options = [0]
        activation_function_options = ['leaky_relu']
        batch_size_options = [32]
        rotation_prob_options = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
        for gaussian_std in gaussian_stdev_options:
            for hidden_units in hidden_units_options:
                for dropout_rate in dropout_rate_options:
                    for activation_function in activation_function_options:
                        for batch_size in batch_size_options:
                            for rotation_prob in rotation_prob_options:
                                results_model_name = f"MLP Rotation Prob {rotation_prob}_new"
                                print(results_model_name)
                                start_time = time.time()
                                model, history = train_mlp(X_train, y_train, X_val, y_val, dataset_name, model_name="mlp_rotation", batch_size=batch_size,
                                                gaussian_noise_stddev=gaussian_std, hidden_units=hidden_units, activation_function=activation_function,dropout_rate=dropout_rate, rotation_prob=rotation_prob)
                                training_time = time.time() - start_time
                                trained_models[dataset_name][results_model_name] = 0
                                model.save(f'{results_model_name}.keras')
                                model_results = evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test, results_model_name, dataset_name)
                                model_results["Training Time (s)"] = training_time
                                results.append(model_results)
                                mlp_training_histories[dataset_name][results_model_name] = history.history


    # Generate combined training plot
    if dataset_name in mlp_training_histories:
        colors = ["blue", "red", "green", "orange", "purple", "pink", "olive", "cyan", "gray", "brown"]  # Define colors for different models

        plt.figure(figsize=(12, 6))
        i = 0
        for trained_model_name in trained_models[dataset_name].keys():
            if trained_model_name in mlp_training_histories[dataset_name]:
                his = mlp_training_histories[dataset_name][trained_model_name]
                print(his)
                color = colors[i % len(colors)]  # Cycle through colors
                i+=1

                # Plot train and validation loss
                plt.plot(his['loss'], color=color, linestyle='-', label=f"{trained_model_name} Train Loss")
                plt.plot(his['val_loss'], color=color, linestyle='--', label=f"{trained_model_name} Validation Loss")

        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.title(f"Training Progress of All MLP Models on {dataset_name}")

        # Save combined plot
        dataset_folder = os.path.join(output_results_folder, dataset_name)
        combined_filename = os.path.join(dataset_folder, f"combined_{dataset_name}.png")
        plt.savefig(combined_filename, dpi=300)
        print(f"Saved combined training plot: {combined_filename}")

df_results = pd.DataFrame(results)
csv_filename = "mlp_dropout_6.csv"
df_results.to_csv(csv_filename, index=False)
print(f"Results saved to {csv_filename}")


In [None]:
# Display results in tabular format
print(tabulate(results, headers="keys", tablefmt="grid"))

In [None]:
# Convert results list (dicts) into a Pandas DataFrame
df_results = pd.DataFrame(results)
csv_filename = "training_results"
df_results.to_csv(csv_filename, index=False)
print(f"Results saved to {csv_filename}")