In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import mlflow
import mlflow.tensorflow
from sklearn.model_selection import train_test_split
import io, os, time
from sklearn.metrics import mean_squared_error



In [2]:
# Load the dataset
def load_dataset(filename='robot_arm_dataset_10M.npz'):
    data = np.load(f'./Data/{filename}')
    return data['inputs'], data['outputs']

In [3]:
# Define the model
def create_model(input_shape, output_shape):
    model = keras.Sequential([
        keras.layers.Input(shape=input_shape),
        keras.layers.Dense(128),
        keras.layers.BatchNormalization(),
        keras.layers.Activation('relu'),
        keras.layers.Dense(64),
        keras.layers.BatchNormalization(),
        keras.layers.Activation('relu'),
        keras.layers.Dense(output_shape)
    ])
    return model

In [4]:
class VerboseLoggingCallback(keras.callbacks.Callback):
    def __init__(self):
        super().__init__()
        self.output = io.StringIO()

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        output = f"Epoch {epoch+1}/{self.params['epochs']} - "
        output += " - ".join(f"{k}: {v:.4f}" for k, v in logs.items())
        print(output)
        self.output.write(output + "\n")

    def get_output(self):
        return self.output.getvalue()
    
class LearningRateLogger(keras.callbacks.Callback):
    def __init__(self, tensorboard_writer):
        super().__init__()
        self.tensorboard_writer = tensorboard_writer

    def on_epoch_end(self, epoch, logs=None):
        lr = self.model.optimizer.lr
        if hasattr(lr, 'value'):
            lr = lr.value()
        with self.tensorboard_writer.as_default():
            tf.summary.scalar('learning_rate', data=lr, step=epoch)
        mlflow.log_metric("learning_rate", lr, step=epoch)
        
class CosineDecayWithWarmupCallback(tf.keras.callbacks.Callback):
    def __init__(self, initial_learning_rate, warmup_steps, total_steps):
        super(CosineDecayWithWarmupCallback, self).__init__()
        self.initial_learning_rate = initial_learning_rate
        self.warmup_steps = warmup_steps
        self.total_steps = total_steps
        self.current_step = 0

    def on_train_batch_begin(self, batch, logs=None):
        if self.current_step < self.warmup_steps:
            lr = self.initial_learning_rate * (self.current_step / self.warmup_steps)
        else:
            progress = (self.current_step - self.warmup_steps) / (self.total_steps - self.warmup_steps)
            lr = 0.5 * self.initial_learning_rate * (1 + np.cos(np.pi * progress))

        tf.keras.backend.set_value(self.model.optimizer.lr, lr)
        self.current_step += 1
        

def forward_kinematics_3dof_vectorized(thetas, l1, l2, l3):
    theta1, theta2, theta3 = thetas[:, 0], thetas[:, 1], thetas[:, 2]
    x = l1 * np.cos(theta1) * np.sin(theta2) + l2 * np.cos(theta1) * np.sin(theta2 + theta3)
    y = l1 * np.sin(theta1) * np.sin(theta2) + l2 * np.sin(theta1) * np.sin(theta2 + theta3)
    z = l1 * np.cos(theta2) + l2 * np.cos(theta2 + theta3) + l3
    return np.column_stack((x, y, z))

def evaluate_inverse_kinematics(model, X_test, y_test, l1, l2, l3):
    # Predict joint angles
    predicted_thetas = model.predict(X_test)
    
    # Apply forward kinematics to predicted thetas (vectorized)
    predicted_positions = forward_kinematics_3dof_vectorized(predicted_thetas, l1, l2, l3)
    
    # Calculate MSE between predicted positions and actual positions (X_test)
    mse = mean_squared_error(X_test, predicted_positions)
    rmse = np.sqrt(mse)
    
    # Calculate average Euclidean distance
    euclidean_distances = np.sqrt(np.sum((X_test - predicted_positions)**2, axis=1))
    avg_distance = np.mean(euclidean_distances)
    
    return {
        "mse": mse,
        "rmse": rmse,
        "avg_euclidean_distance": avg_distance
    }

In [5]:
def train_model(batch_size, epochs, initial_learning_rate, test_size=0.2, experiment_name="Inverse Kinematics NN", run_name=None):
    # Set up MLflow
    mlflow.set_experiment(experiment_name)
    
    # Generate a unique run name if one is provided
    if run_name:
        timestamp = int(time.time())
        unique_run_name = f"{run_name}_{timestamp}"
    else:
        unique_run_name = None
        
    with mlflow.start_run(run_name=unique_run_name) as run:
        # Create a consistent directory structure for TensorBoard logs
        run_id = run.info.run_id
        run_name = run.data.tags.get('mlflow.runName', run_id)
        log_dir = os.path.join("logs", experiment_name, unique_run_name)
        os.makedirs(log_dir, exist_ok=True)
        
        
        # Load and split the data
        X, y = load_dataset()
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
        # Print the shapes
        print("X_train shape:", X_train.shape)
        print("X_test shape:", X_test.shape)
        print("y_train shape:", y_train.shape)
        print("y_test shape:", y_test.shape)

        # Log parameters
        mlflow.log_param("batch_size", batch_size)
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("initial_learning_rate", initial_learning_rate)
        mlflow.log_param("test_size", test_size)

        # Calculate total steps
        steps_per_epoch = len(X_train) // batch_size
        total_steps = steps_per_epoch * epochs
        warmup_steps = int(0.1 * total_steps)  # 10% of total steps for warmup
        
        # Create and compile the model
        model = create_model(input_shape=(3,), output_shape=3)
        optimizer = keras.optimizers.Adam(learning_rate=initial_learning_rate)
        model.compile(optimizer=optimizer, loss='mse')

        # Log model summary
        model_summary = io.StringIO()
        model.summary(print_fn=lambda x: model_summary.write(x + '\n'))
        mlflow.log_text(model_summary.getvalue(), "model_summary.txt")

        # Set up TensorBoard callback and writer
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        tensorboard_writer = tf.summary.create_file_writer(log_dir)

        # Set up other callbacks
        early_stopping = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
        lr_scheduler = CosineDecayWithWarmupCallback(initial_learning_rate, warmup_steps, total_steps)
        lr_logger = LearningRateLogger(tensorboard_writer)
        
        callbacks = [tensorboard_callback, lr_scheduler, lr_logger]

        # Log callback names
        callback_names = [callback.__class__.__name__ for callback in callbacks]
        mlflow.log_param("callbacks", ", ".join(callback_names))

        # Train the model
        history = model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            callbacks=callbacks,
            verbose=1
        )

        # Log metrics
        for epoch, (loss, val_loss) in enumerate(zip(
            history.history['loss'],
            history.history['val_loss']
        )):
            mlflow.log_metric("train_loss", loss, step=epoch)
            mlflow.log_metric("val_loss", val_loss, step=epoch)

        # Log the TensorBoard log directory
        mlflow.log_param("tensorboard_log_dir", log_dir)

        # Log the model
        mlflow.tensorflow.log_model(model, "model")
        
        # After training, evaluate the model
        evaluation_results = evaluate_inverse_kinematics(model, X_test, y_test, l1=1.0, l2=1.5, l3=0.5)

        # Log the evaluation results
        mlflow.log_metric("test_mse", evaluation_results["mse"])
        mlflow.log_metric("test_rmse", evaluation_results["rmse"])
        mlflow.log_metric("test_avg_euclidean_distance", evaluation_results["avg_euclidean_distance"])

    print("Training completed and logged with MLflow and TensorBoard.")
    print(f"Experiment name: {experiment_name}")
    print(f"Run name: {run_name}")
    print(f"Run ID: {run_id}")
    print(f"TensorBoard logs saved to: {log_dir}")
    print("To view in TensorBoard, run:")
    print(f"tensorboard --logdir logs/{experiment_name}")

In [None]:
configurations = [
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.1,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_baseline"
    },
    {
        "batch_size": 2**14,
        "epochs": 100,
        "initial_learning_rate": 0.01,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_smaller_batch_lower_lr"
    },
    {
        "batch_size": 2**16,
        "epochs": 30,
        "initial_learning_rate": 0.001,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_larger_batch_lowest_lr"
    },
    {
        "batch_size": 2**15,
        "epochs": 75,
        "initial_learning_rate": 0.05,
        "test_size": 0.15,
        "run_name": "CosineWithWarmUp_medium_lr_smaller_test"
    },
    {
        "batch_size": 2**13,
        "epochs": 150,
        "initial_learning_rate": 0.1,
        "test_size": 0.25,
        "run_name": "CosineWithWarmUp_smallest_batch_high_epochs"
    },
    {
        "batch_size": 2**17,
        "epochs": 25,
        "initial_learning_rate": 0.2,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_largest_batch_highest_lr"
    },
    {
        "batch_size": 2**14,
        "epochs": 60,
        "initial_learning_rate": 0.075,
        "test_size": 0.18,
        "run_name": "CosineWithWarmUp_balanced_approach"
    },
    {
        "batch_size": 2**15,
        "epochs": 40,
        "initial_learning_rate": 0.15,
        "test_size": 0.22,
        "run_name": "CosineWithWarmUp_higher_lr_more_test"
    }
]

configurations2 = [
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.1,
        "test_size": 0.2,
        "run_name": "SELU_CosineWithWarmUp_baseline_lr0.1"
    },
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.3,
        "test_size": 0.2,
        "run_name": "SELU_CosineWithWarmUp_high_lr0.3"
    },
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.01,
        "test_size": 0.2,
        "run_name": "SELU_CosineWithWarmUp_low_lr0.01"
    }
]

configurations3 = [
    {
        "batch_size": 2**15,
        "epochs": 50,
        "initial_learning_rate": 0.05,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_baseline_lr0.05"
    }
]

# Example usage:
for config in configurations3:
    train_model(**config)

In [6]:
configurations = [
    {
        "batch_size": 2**15,
        "epochs": 25,
        "initial_learning_rate": 0.1,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_e25_lr0.1",
        "experiment_name": "Inverse Kinematics NN_IKEval"
    },
    {
        "batch_size": 2**15,
        "epochs": 25,
        "initial_learning_rate": 0.15,
        "test_size": 0.2,
        "run_name": "CosineWithWarmUp_e25_lr0.15",
        "experiment_name": "Inverse Kinematics NN_IKEval"
    },
]

# Example usage:
for config in configurations:
    train_model(**config)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25




INFO:tensorflow:Assets written to: /tmp/tmp8_ce1jw0/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN_IKEval
Run name: CosineWithWarmUp_e25_lr0.1_1723838729
Run ID: 1599f6498d2449a78c34403cdb5a0cba
TensorBoard logs saved to: logs/Inverse Kinematics NN_IKEval/CosineWithWarmUp_e25_lr0.1_1723838729
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN_IKEval
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25




INFO:tensorflow:Assets written to: /tmp/tmp8y97hf92/model/data/model/assets




Training completed and logged with MLflow and TensorBoard.
Experiment name: Inverse Kinematics NN_IKEval
Run name: CosineWithWarmUp_e25_lr0.15_1723838980
Run ID: 8e73510f57fa408b85756fac094b5723
TensorBoard logs saved to: logs/Inverse Kinematics NN_IKEval/CosineWithWarmUp_e25_lr0.15_1723838980
To view in TensorBoard, run:
tensorboard --logdir logs/Inverse Kinematics NN_IKEval
