In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import mlflow
import mlflow.tensorflow
from sklearn.model_selection import train_test_split
import io
import time
import matplotlib.pyplot as plt

In [2]:
# Constants
L1, L2, L3 = 1.0, 1.5, 0.5  # link lengths

@tf.function
def forward_kinematics_tf(theta):
    theta1, theta2, theta3 = tf.unstack(theta, axis=1)
    
    x = L1 * tf.cos(theta1) * tf.sin(theta2) + L2 * tf.cos(theta1) * tf.sin(theta2 + theta3)
    y = L1 * tf.sin(theta1) * tf.sin(theta2) + L2 * tf.sin(theta1) * tf.sin(theta2 + theta3)
    z = L1 * tf.cos(theta2) + L2 * tf.cos(theta2 + theta3) + L3
    
    return tf.stack([x, y, z], axis=1)

def evaluate_model(model, test_inputs, test_outputs, input_mean, input_std, batch_size=2**16):
    test_inputs = tf.convert_to_tensor(test_inputs, dtype=tf.float32)
    input_mean = tf.convert_to_tensor(input_mean, dtype=tf.float32)
    input_std = tf.convert_to_tensor(input_std, dtype=tf.float32)
    predicted_angles_normalized = model.predict(test_inputs, batch_size=batch_size)
    predicted_angles = predicted_angles_normalized * (np.pi/2)
    true_xyz = test_inputs * input_std + input_mean
    predicted_xyz = forward_kinematics_tf(predicted_angles)
    errors = tf.norm(true_xyz - predicted_xyz, axis=1)
    return errors.numpy(), true_xyz.numpy(), predicted_xyz.numpy()


def create_model(config):
    return create_model_with_activations(config['activation_config'], config['fk_weight'])

def plot_error_distribution(errors, title, save_path=None):
    plt.figure(figsize=(12, 6))
    plt.hist(errors, bins=400, alpha=0.5)
    plt.title(title)
    plt.xlabel('Error (Euclidean distance)')
    plt.ylabel('Frequency')
    plt.xlim([0, 0.5])
    
    if save_path:
        plt.savefig(save_path)
        plt.close()
    else:
        plt.show()

def plot_true_vs_predicted(true_xyz, predicted_xyz, title, save_path=None):
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    fig.suptitle(title)
    
    for i, coord in enumerate(['X', 'Y', 'Z']):
        ax = axes[i]
        ax.scatter(true_xyz[:, i], predicted_xyz[:, i], alpha=0.1)
        ax.plot([true_xyz[:, i].min(), true_xyz[:, i].max()], [true_xyz[:, i].min(), true_xyz[:, i].max()], 'r--')
        ax.set_xlabel(f'True {coord}')
        ax.set_ylabel(f'Predicted {coord}')
        ax.set_title(f'{coord} Coordinate: True vs Predicted')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path)
        plt.close()
    else:
        plt.show()

def load_and_preprocess_data(filename='robot_arm_dataset_10M.npz'):
    data = np.load(f'./Data/{filename}')
    inputs, outputs = data['inputs'], data['outputs']
    
    input_mean = np.mean(inputs, axis=0)
    input_std = np.std(inputs, axis=0)
    inputs_normalized = (inputs - input_mean) / input_std

    outputs_normalized = outputs / (np.pi/2)

    split_index = int(0.9 * len(inputs))
    train_inputs, test_inputs = inputs_normalized[:split_index], inputs_normalized[split_index:]
    train_outputs, test_outputs = outputs_normalized[:split_index], outputs_normalized[split_index:]

    return (train_inputs, train_outputs), (test_inputs, test_outputs), input_mean, input_std

class VerboseLoggingCallback(keras.callbacks.Callback):
    def __init__(self):
        super().__init__()
        self.output = io.StringIO()

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        output = f"Epoch {epoch+1}/{self.params['epochs']} - "
        output += " - ".join(f"{k}: {v:.4f}" for k, v in logs.items())
        print(output)
        self.output.write(output + "\n")

    def get_output(self):
        return self.output.getvalue()

class LearningRateLogger(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        lr = self.model.optimizer.lr
        if hasattr(lr, 'value'):
            lr = lr.value()
        mlflow.log_metric("learning_rate", lr, step=epoch)
        
class CosineDecayWithWarmupCallback(tf.keras.callbacks.Callback):
    def __init__(self, initial_learning_rate, warmup_steps, total_steps):
        super(CosineDecayWithWarmupCallback, self).__init__()
        self.initial_learning_rate = initial_learning_rate
        self.warmup_steps = warmup_steps
        self.total_steps = total_steps
        self.current_step = 0

    def on_train_batch_begin(self, batch, logs=None):
        if self.current_step < self.warmup_steps:
            lr = self.initial_learning_rate * (self.current_step / self.warmup_steps)
        else:
            progress = (self.current_step - self.warmup_steps) / (self.total_steps - self.warmup_steps)
            lr = 0.5 * self.initial_learning_rate * (1 + np.cos(np.pi * progress))

        tf.keras.backend.set_value(self.model.optimizer.lr, lr)
        self.current_step += 1

def mish(x):
    return x * tf.math.tanh(tf.math.softplus(x))

def custom_loss(fk_weight):
    def loss_fn(y_true, y_pred):
        # Huber loss for joint angles
        angle_loss = tf.keras.losses.Huber(delta=0.1)(y_true, y_pred)
        
        # Forward kinematics loss (using Huber loss)
        fk_true = forward_kinematics_tf(y_true)
        fk_pred = forward_kinematics_tf(y_pred)
        fk_loss = tf.keras.losses.Huber(delta=0.1)(fk_true, fk_pred)
        
        # Combine losses
        total_loss = angle_loss + fk_weight * fk_loss
        return total_loss
    return loss_fn

def create_model_with_activations(activation_config, fk_weight):
    model = keras.Sequential([
        keras.layers.Dense(128, input_shape=(3,)),
        keras.layers.BatchNormalization(),
        keras.layers.Activation(activation_functions[activation_config[0]]),
        
        keras.layers.Dense(256),
        keras.layers.BatchNormalization(),
        keras.layers.Activation(activation_functions[activation_config[1]]),
        
        keras.layers.Dense(256),
        keras.layers.BatchNormalization(),
        keras.layers.Activation(activation_functions[activation_config[2]]),
        
        keras.layers.Dense(128),
        keras.layers.BatchNormalization(),
        keras.layers.Activation(activation_functions[activation_config[3]]),
        
        keras.layers.Dense(3, activation='tanh')
    ])
    
    model.compile(optimizer='adam', loss=custom_loss(fk_weight))
    
    return model

def run_single_experiment(config):
    (train_inputs, train_outputs), (test_inputs, test_outputs), input_mean, input_std = load_and_preprocess_data()
    
    # Split test data into validation and test sets
    valid_inputs, test_inputs, valid_outputs, test_outputs = train_test_split(
        test_inputs, test_outputs, test_size=0.5, random_state=42
    )
    
    mlflow.set_experiment(config['experiment_name'])
    
    with mlflow.start_run(run_name=config['model_name']):
        # Log parameters
        mlflow.log_params(config)
        
        # Calculate total steps
        steps_per_epoch = len(train_inputs) // config['batch_size']
        total_steps = steps_per_epoch * config['epochs']
        warmup_steps = int(0.1 * total_steps)  # 10% of total steps for warmup
        
        # Create and compile model
        model = create_model(config)
        
        # Log model summary
        model_summary = io.StringIO()
        model.summary(print_fn=lambda x: model_summary.write(x + '\n'))
        mlflow.log_text(model_summary.getvalue(), "model_summary.txt")
        
        # Set up callbacks
        early_stopping = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
        lr_scheduler = CosineDecayWithWarmupCallback(
            config['initial_learning_rate'],
            warmup_steps,
            total_steps
        )
        verbose_logging = VerboseLoggingCallback()
        lr_logger = LearningRateLogger()
        
        callbacks = [lr_scheduler, verbose_logging, lr_logger]
        
        # Log callback names
        callback_names = [callback.__class__.__name__ for callback in callbacks]
        mlflow.log_param("callbacks", ", ".join(callback_names))
        
        # Train the model
        start_time = time.time()
        history = model.fit(
            train_inputs, train_outputs,
            epochs=config['epochs'],
            batch_size=config['batch_size'],
            validation_data=(valid_inputs, valid_outputs),
            callbacks=callbacks
        )
        training_time = time.time() - start_time
        
        # Log training metrics
        for epoch, (loss, val_loss) in enumerate(zip(history.history['loss'], history.history['val_loss'])):
            mlflow.log_metric("train_loss", loss, step=epoch)
            mlflow.log_metric("val_loss", val_loss, step=epoch)
        
        mlflow.log_metric("training_time", training_time)
        
        # Log model
        mlflow.tensorflow.log_model(model, "model")
        
        # Log training output
        mlflow.log_text(verbose_logging.get_output(), "training_output.txt")
        
        # Evaluate model
        errors, true_xyz, predicted_xyz = evaluate_model(model, test_inputs, test_outputs, input_mean, input_std)
        
        # Log evaluation metrics
        mlflow.log_metric("mean_error", np.mean(errors))
        mlflow.log_metric("median_error", np.median(errors))
        mlflow.log_metric("90th_percentile_error", np.percentile(errors, 90))
        mlflow.log_metric("max_error", np.max(errors))
        
        # Generate and log the true vs predicted plot
        true_vs_pred_plot_path = f"./Figures/ActivationFunction/{config['model_name']}_true_vs_predicted.png"
        plot_true_vs_predicted(true_xyz, predicted_xyz, f"{config['model_name']} Model: True vs Predicted", save_path=true_vs_pred_plot_path)
        mlflow.log_artifact(true_vs_pred_plot_path)
        
        # Generate and log the error distribution plot
        error_dist_plot_path = f"./Figures/ActivationFunction/{config['model_name']}_error_distribution.png"
        plot_error_distribution(errors, f"{config['model_name']} Model: Error Distribution", save_path=error_dist_plot_path)
        mlflow.log_artifact(error_dist_plot_path)
        
        print(f"\n{config['model_name']} Model:")
        print(f"Mean Error: {np.mean(errors):.4f}")
        print(f"Median Error: {np.median(errors):.4f}")
        print(f"90th Percentile Error: {np.percentile(errors, 90):.4f}")
        print(f"Max Error: {np.max(errors):.4f}")
        print(f"Training Time: {training_time:.2f} seconds")
        
        return {
            'model': model,
            'history': history,
            'errors': errors,
            'true_xyz': true_xyz,
            'predicted_xyz': predicted_xyz,
            'training_time': training_time
        }

In [3]:
# Create configurations
configs = []

# Define activation functions
activation_functions = {
    'relu': tf.nn.relu,
    'leaky_relu': tf.nn.leaky_relu,
    'swish': tf.nn.swish,
    'mish': mish,
    'elu': tf.nn.elu,
    'selu': tf.nn.selu  # Adding SELU
}

activation_combinations = [
    ('relu', 'relu', 'relu', 'relu'),
#     ('leaky_relu', 'leaky_relu', 'leaky_relu', 'leaky_relu'),
#     ('swish', 'swish', 'swish', 'swish'),
#     ('mish', 'mish', 'mish', 'mish'),
#     ('elu', 'elu', 'elu', 'elu'),
#     ('selu', 'selu', 'selu', 'selu'),  # Adding SELU combination
#     ('relu', 'leaky_relu', 'swish', 'mish'),
#     ('swish', 'mish', 'elu', 'relu'),
#     ('mish', 'elu', 'relu', 'leaky_relu'),
#     ('elu', 'relu', 'leaky_relu', 'swish'),
#     ('leaky_relu', 'swish', 'mish', 'elu'),
#     ('selu', 'relu', 'swish', 'elu'),  # Adding a mixed combination with SELU
#     ('mish', 'selu', 'leaky_relu', 'swish')  # Another mixed combination with SELU
]

learning_rates = [1e-2, 4e-2]

for idx, activation_combo in enumerate(activation_combinations):
    for lr in learning_rates:
        config = {
            "model_name": f"Model__e100_BN_{idx+1}_{'_'.join(activation_combo)}_lr_{lr}",
            "fk_weight": 10,
            "epochs": 100,
            "initial_learning_rate": lr,
            "batch_size": 65536,
            "experiment_name": "Inverse Kinematics Activation FN",
            "activation_config": activation_combo
        }
        configs.append(config)

# Print the total number of configurations
print(f"Total number of configurations: {len(configs)}")

# Optionally, print out all configurations to verify
for config in configs:
    print(f"Model: {config['model_name']}, LR: {config['initial_learning_rate']}")

Total number of configurations: 2
Model: Model__e100_BN_1_relu_relu_relu_relu_lr_0.01, LR: 0.01
Model: Model__e100_BN_1_relu_relu_relu_relu_lr_0.04, LR: 0.04


In [4]:
# Run experiments
results = {}
for config in configs:
    print(f"Running experiment: {config['model_name']}")
    results[config['model_name']] = run_single_experiment(config)

# Print summary of results
for model_name, result in results.items():
    print(f"\n{model_name}:")
    print(f"Mean Error: {np.mean(result['errors']):.4f}")
    print(f"Median Error: {np.median(result['errors']):.4f}")
    print(f"90th Percentile Error: {np.percentile(result['errors'], 90):.4f}")
    print(f"Max Error: {np.max(result['errors']):.4f}")
    print(f"Training Time: {result['training_time']:.2f} seconds")

Running experiment: Model__e100_BN_1_relu_relu_relu_relu_lr_0.01


Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/mlflow/store/tracking/file_store.py", line 317, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/usr/local/lib/python3.8/dist-packages/mlflow/store/tracking/file_store.py", line 410, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/usr/local/lib/python3.8/dist-packages/mlflow/store/tracking/file_store.py", line 1341, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/usr/local/lib/python3.8/dist-packages/mlflow/store/tracking/file_store.py", line 1334, in _read_helper
    result = read_yaml(root, file_name)
  File "/usr/local/lib/python3.8/dist-packages/mlflow/utils/file_utils.py", line 309, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/tf/workdir/mlruns/mlruns/meta.yaml' does not

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100


Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100


Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100




INFO:tensorflow:Assets written to: /tmp/tmp9libcnvh/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmp9libcnvh/model/data/model/assets



Model__e100_BN_1_relu_relu_relu_relu_lr_0.01 Model:
Mean Error: 0.0259
Median Error: 0.0263
90th Percentile Error: 0.0436
Max Error: 3.8259
Training Time: 294.08 seconds
Running experiment: Model__e100_BN_1_relu_relu_relu_relu_lr_0.04


Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/mlflow/store/tracking/file_store.py", line 317, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/usr/local/lib/python3.8/dist-packages/mlflow/store/tracking/file_store.py", line 410, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/usr/local/lib/python3.8/dist-packages/mlflow/store/tracking/file_store.py", line 1341, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/usr/local/lib/python3.8/dist-packages/mlflow/store/tracking/file_store.py", line 1334, in _read_helper
    result = read_yaml(root, file_name)
  File "/usr/local/lib/python3.8/dist-packages/mlflow/utils/file_utils.py", line 309, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/tf/workdir/mlruns/mlruns/meta.yaml' does not

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100


Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100


Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100




INFO:tensorflow:Assets written to: /tmp/tmpihufc10w/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmpihufc10w/model/data/model/assets



Model__e100_BN_1_relu_relu_relu_relu_lr_0.04 Model:
Mean Error: 0.0247
Median Error: 0.0265
90th Percentile Error: 0.0423
Max Error: 3.3450
Training Time: 291.40 seconds

Model__e100_BN_1_relu_relu_relu_relu_lr_0.01:
Mean Error: 0.0259
Median Error: 0.0263
90th Percentile Error: 0.0436
Max Error: 3.8259
Training Time: 294.08 seconds

Model__e100_BN_1_relu_relu_relu_relu_lr_0.04:
Mean Error: 0.0247
Median Error: 0.0265
90th Percentile Error: 0.0423
Max Error: 3.3450
Training Time: 291.40 seconds
