In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import mlflow
import mlflow.tensorflow
from sklearn.model_selection import train_test_split
import io, os, time



In [2]:
# Load the dataset
def load_dataset(filename='robot_arm_dataset_10M.npz'):
    data = np.load(f'./Data/{filename}')
    return data['inputs'], data['outputs']

In [9]:
# Define the model
def create_model(input_shape, output_shape):
    model = keras.Sequential([
        keras.layers.Input(shape=input_shape),
        keras.layers.Dense(128),
        keras.layers.BatchNormalization(),
        keras.layers.Activation('relu'),
        keras.layers.Dense(64),
        keras.layers.BatchNormalization(),
        keras.layers.Activation('relu'),
        keras.layers.Dense(output_shape)
    ])
    return model

In [10]:
class VerboseLoggingCallback(keras.callbacks.Callback):
    def __init__(self):
        super().__init__()
        self.output = io.StringIO()

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        output = f"Epoch {epoch+1}/{self.params['epochs']} - "
        output += " - ".join(f"{k}: {v:.4f}" for k, v in logs.items())
        print(output)
        self.output.write(output + "\n")

    def get_output(self):
        return self.output.getvalue()
    
class LearningRateLogger(keras.callbacks.Callback):
    def __init__(self, tensorboard_writer):
        super().__init__()
        self.tensorboard_writer = tensorboard_writer

    def on_epoch_end(self, epoch, logs=None):
        lr = self.model.optimizer.lr
        if hasattr(lr, 'value'):
            lr = lr.value()
        with self.tensorboard_writer.as_default():
            tf.summary.scalar('learning_rate', data=lr, step=epoch)
        mlflow.log_metric("learning_rate", lr, step=epoch)
        
class CosineDecayWithWarmupCallback(tf.keras.callbacks.Callback):
    def __init__(self, initial_learning_rate, warmup_steps, total_steps):
        super(CosineDecayWithWarmupCallback, self).__init__()
        self.initial_learning_rate = initial_learning_rate
        self.warmup_steps = warmup_steps
        self.total_steps = total_steps
        self.current_step = 0

    def on_train_batch_begin(self, batch, logs=None):
        if self.current_step < self.warmup_steps:
            lr = self.initial_learning_rate * (self.current_step / self.warmup_steps)
        else:
            progress = (self.current_step - self.warmup_steps) / (self.total_steps - self.warmup_steps)
            lr = 0.5 * self.initial_learning_rate * (1 + np.cos(np.pi * progress))

        tf.keras.backend.set_value(self.model.optimizer.lr, lr)
        self.current_step += 1

In [11]:
def train_model(batch_size, epochs, initial_learning_rate, test_size=0.2, experiment_name="Inverse Kinematics NN", run_name=None):
    # Set up MLflow
    mlflow.set_experiment(experiment_name)
    
    # Generate a unique run name if one is provided
    if run_name:
        timestamp = int(time.time())
        unique_run_name = f"{run_name}_{timestamp}"
    else:
        unique_run_name = None
        
    with mlflow.start_run(run_name=unique_run_name) as run:
        # Create a consistent directory structure for TensorBoard logs
        run_id = run.info.run_id
        run_name = run.data.tags.get('mlflow.runName', run_id)
        log_dir = os.path.join("logs", experiment_name, unique_run_name)
        os.makedirs(log_dir, exist_ok=True)
        
        
        # Load and split the data
        X, y = load_dataset()
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

        # Log parameters
        mlflow.log_param("batch_size", batch_size)
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("initial_learning_rate", initial_learning_rate)
        mlflow.log_param("test_size", test_size)

        # Calculate total steps
        steps_per_epoch = len(X_train) // batch_size
        total_steps = steps_per_epoch * epochs
        warmup_steps = int(0.1 * total_steps)  # 10% of total steps for warmup
        
        # Create and compile the model
        model = create_model(input_shape=(3,), output_shape=3)
        optimizer = keras.optimizers.Adam(learning_rate=initial_learning_rate)
        model.compile(optimizer=optimizer, loss='mse')

        # Log model summary
        model_summary = io.StringIO()
        model.summary(print_fn=lambda x: model_summary.write(x + '\n'))
        mlflow.log_text(model_summary.getvalue(), "model_summary.txt")

        # Set up TensorBoard callback and writer
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        tensorboard_writer = tf.summary.create_file_writer(log_dir)

        # Set up other callbacks
        early_stopping = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
        lr_scheduler = CosineDecayWithWarmupCallback(initial_learning_rate, warmup_steps, total_steps)
        lr_logger = LearningRateLogger(tensorboard_writer)
        
        callbacks = [tensorboard_callback, lr_scheduler, lr_logger]

        # Log callback names
        callback_names = [callback.__class__.__name__ for callback in callbacks]
        mlflow.log_param("callbacks", ", ".join(callback_names))

        # Train the model
        history = model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            callbacks=callbacks,
            verbose=1
        )

        # Log metrics
        for epoch, (loss, val_loss) in enumerate(zip(
            history.history['loss'],
            history.history['val_loss']
        )):
            mlflow.log_metric("train_loss", loss, step=epoch)
            mlflow.log_metric("val_loss", val_loss, step=epoch)

        # Log the TensorBoard log directory
        mlflow.log_param("tensorboard_log_dir", log_dir)

        # Log the model
        mlflow.tensorflow.log_model(model, "model")

    print("Training completed and logged with MLflow and TensorBoard.")
    print(f"Experiment name: {experiment_name}")
    print(f"Run name: {run_name}")
    print(f"Run ID: {run_id}")
    print(f"TensorBoard logs saved to: {log_dir}")
    print("To view in TensorBoard, run:")
    print(f"tensorboard --logdir logs/{experiment_name}")

In [15]:
configurations = [
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.1,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_baseline"
    },
    {
        "batch_size": 2**14,
        "epochs": 100,
        "initial_learning_rate": 0.01,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_smaller_batch_lower_lr"
    },
    {
        "batch_size": 2**16,
        "epochs": 30,
        "initial_learning_rate": 0.001,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_larger_batch_lowest_lr"
    },
    {
        "batch_size": 2**15,
        "epochs": 75,
        "initial_learning_rate": 0.05,
        "test_size": 0.15,
        "run_name": "CosineWithWarmUp_medium_lr_smaller_test"
    },
    {
        "batch_size": 2**13,
        "epochs": 150,
        "initial_learning_rate": 0.1,
        "test_size": 0.25,
        "run_name": "CosineWithWarmUp_smallest_batch_high_epochs"
    },
    {
        "batch_size": 2**17,
        "epochs": 25,
        "initial_learning_rate": 0.2,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_largest_batch_highest_lr"
    },
    {
        "batch_size": 2**14,
        "epochs": 60,
        "initial_learning_rate": 0.075,
        "test_size": 0.18,
        "run_name": "CosineWithWarmUp_balanced_approach"
    },
    {
        "batch_size": 2**15,
        "epochs": 40,
        "initial_learning_rate": 0.15,
        "test_size": 0.22,
        "run_name": "CosineWithWarmUp_higher_lr_more_test"
    }
]

configurations2 = [
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.1,
        "test_size": 0.2,
        "run_name": "SELU_CosineWithWarmUp_baseline_lr0.1"
    },
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.3,
        "test_size": 0.2,
        "run_name": "SELU_CosineWithWarmUp_high_lr0.3"
    },
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.01,
        "test_size": 0.2,
        "run_name": "SELU_CosineWithWarmUp_low_lr0.01"
    }
]

configurations3 = [
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.05,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_baseline_lr0.05"
    }
]

# Example usage:
for config in configurations3:
    train_model(**config)

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75




INFO:tensorflow:Assets written to: /tmp/tmpmtyacjt_/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN
Run name: CosineWithWarmUp_baseline_lr0.05_1723836916
Run ID: 4ea9f1f2bbdf4355855f4ea149ff374a
TensorBoard logs saved to: logs/Inverse Kinematics NN/CosineWithWarmUp_baseline_lr0.05_1723836916
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN


In [16]:
configurations = [
    {
        "batch_size": 2**15,
        "epochs": 25,
        "initial_learning_rate": 0.1,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_e25_lr0.1"
    },
    {
        "batch_size": 2**15,
        "epochs": 25,
        "initial_learning_rate": 0.3,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_e25_lr0.3"
    },
    {
        "batch_size": 2**15,
        "epochs": 25,
        "initial_learning_rate": 0.05,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_e25_lr0.05"
    },
    {
        "batch_size": 2**15,
        "epochs": 25,
        "initial_learning_rate": 0.15,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_e25_lr0.15"
    },
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.1,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_e50_lr0.1"
    },
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.3,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_e50_lr0.3"
    },
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.05,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_e50_lr0.05"
    },
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.15,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_e50_lr0.15"
    }
]

# Example usage:
for config in configurations:
    train_model(**config)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25




INFO:tensorflow:Assets written to: /tmp/tmp4ucvgb0_/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN
Run name: CosineWithWarmUp_e25_lr0.1_1723837197
Run ID: 32e7dc1ce3dc4669813c0be2b768fa9b
TensorBoard logs saved to: logs/Inverse Kinematics NN/CosineWithWarmUp_e25_lr0.1_1723837197
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25




INFO:tensorflow:Assets written to: /tmp/tmpmz3y4ity/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN
Run name: CosineWithWarmUp_e25_lr0.3_1723837254
Run ID: 5552288cfe6a4fbda1f0a49730dbdccc
TensorBoard logs saved to: logs/Inverse Kinematics NN/CosineWithWarmUp_e25_lr0.3_1723837254
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25




INFO:tensorflow:Assets written to: /tmp/tmp05lun0qr/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN
Run name: CosineWithWarmUp_e25_lr0.05_1723837313
Run ID: 8a009349725747b0b424dbe5f7dbef80
TensorBoard logs saved to: logs/Inverse Kinematics NN/CosineWithWarmUp_e25_lr0.05_1723837313
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25




INFO:tensorflow:Assets written to: /tmp/tmp3imar7qc/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN
Run name: CosineWithWarmUp_e25_lr0.15_1723837370
Run ID: ff65065f04f14c419ee62fe624e38a44
TensorBoard logs saved to: logs/Inverse Kinematics NN/CosineWithWarmUp_e25_lr0.15_1723837370
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




INFO:tensorflow:Assets written to: /tmp/tmpkyrlzugb/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN
Run name: CosineWithWarmUp_e50_lr0.1_1723837428
Run ID: 82294e08013546c0897b69a08010c959
TensorBoard logs saved to: logs/Inverse Kinematics NN/CosineWithWarmUp_e50_lr0.1_1723837428
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




INFO:tensorflow:Assets written to: /tmp/tmpj72fil8e/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN
Run name: CosineWithWarmUp_e50_lr0.3_1723837534
Run ID: c1793ba4ddcf4cdbae6c98d478e016e4
TensorBoard logs saved to: logs/Inverse Kinematics NN/CosineWithWarmUp_e50_lr0.3_1723837534
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




INFO:tensorflow:Assets written to: /tmp/tmpcrmeq8qp/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN
Run name: CosineWithWarmUp_e50_lr0.05_1723837644
Run ID: e3b64d48c9374b02b6e9f4ae6f18292d
TensorBoard logs saved to: logs/Inverse Kinematics NN/CosineWithWarmUp_e50_lr0.05_1723837644
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




INFO:tensorflow:Assets written to: /tmp/tmpi0i20ln6/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN
Run name: CosineWithWarmUp_e50_lr0.15_1723837750
Run ID: 62ef56344f1a440485a56d457022dcbe
TensorBoard logs saved to: logs/Inverse Kinematics NN/CosineWithWarmUp_e50_lr0.15_1723837750
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN
