In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import seaborn as sns
import glob
import os
from itertools import product


In [4]:
width = 6.5  # in inches
height = width * 0.75  # 4:3 aspect ratio
fig_size = (width, height)

reds = sns.color_palette("Reds", 6)
blues = sns.color_palette("Blues", 6)

In [5]:
noise_multipliers = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]
experiments = list(noise_multipliers)

len(experiments)

figure_base_path = "figures/dp/"

In [10]:
losses = {}
losses[0.5] = torch.load('../losses/dp/17-07/losses_dp_1_epoch301.pt')

losses[0.75] = torch.load('../losses/dp/17-07/losses_dp_2_epoch600.pt')
# losses[1.0] = torch.load('../losses/dp/17-07/losses_dp_1.0_epoch??.pt')
losses[1.25] = torch.load('../losses/dp/17-07/losses_dp_1_epoch600.pt')
losses[1.5] = torch.load('../losses/dp/17-07/losses_dp_2_epoch420.pt')

for key in losses:
    print(f"Last loss for noise multiplier {key}: {losses[key]['train_loss'][-1]}")

Last loss for noise multiplier 0.5: 1.2524595657238662
Last loss for noise multiplier 0.75: 1.0897101980846169
Last loss for noise multiplier 1.25: 1.2661134333122226
Last loss for noise multiplier 1.5: 1.4236726948378826


In [14]:
metrics = {}

metrics[0.5] = torch.load('../metrics/dp/17-07/metrics_dp_1_epoch301.pt', weights_only=False)
metrics[0.75] = torch.load('../metrics/dp/17-07/metrics_dp_2_epoch600.pt', weights_only=False)
# metrics[1.0] = torch.load('../metrics/dp/17-07/metrics_dp_1.0_epoch??.pt', weights_only=False)
metrics[1.25] = torch.load('../metrics/dp/17-07/metrics_dp_1_epoch600.pt', weights_only=False)
metrics[1.5] = torch.load('../metrics/dp/17-07/metrics_dp_2_epoch420.pt', weights_only=False)

for key in metrics:
    print(f"Last metric for noise multiplier {key}: {metrics[key]}")

Last metric for noise multiplier 0.5: [{'epsilon': 0.5, 'delta': 1e-05, 'iterations': 5470, 'mse': 17.156686782836914, 'mae': 3.607133388519287, 'rmse': 4.142063107056303}, {'epsilon': 1.0, 'delta': 1e-05, 'iterations': 5470, 'mse': 17.156686782836914, 'mae': 3.607133388519287, 'rmse': 4.142063107056303}, {'epsilon': 1.5, 'delta': 1e-05, 'iterations': 5470, 'mse': 17.156686782836914, 'mae': 3.607133388519287, 'rmse': 4.142063107056303}, {'epsilon': 2.0, 'delta': 1e-05, 'iterations': 5470, 'mse': 17.156686782836914, 'mae': 3.607133388519287, 'rmse': 4.142063107056303}, {'epsilon': 2.5, 'delta': 1e-05, 'iterations': 5470, 'mse': 17.156686782836914, 'mae': 3.607133388519287, 'rmse': 4.142063107056303}, {'epsilon': 3.0, 'delta': 1e-05, 'iterations': 5470, 'mse': 17.156686782836914, 'mae': 3.607133388519287, 'rmse': 4.142063107056303}, {'epsilon': 3.5, 'delta': 1e-05, 'iterations': 32820, 'mse': 15.032976150512695, 'mae': 3.4394876956939697, 'rmse': 3.877238211731734}, {'epsilon': 4.0, 'del

In [None]:
# Find the latest epoch number in the filenames, this can depend on the experiment
latest_epochs = {}
for i in range(1, len(experiments) + 1):
    files = glob.glob(f'../losses/np/14-07/losses_dp_{i}_epoch*.pt')
    if files:
        latest_epoch = max(int(os.path.basename(f).split('_')[-1].replace('epoch', '').replace('.pt', '')) for f in files)
        latest_epochs[i] = latest_epoch

# Now we will load the losses for each experiment at the latest epoch
loss_files = []
for i in range(1, len(experiments) + 1):
    file_pattern = f'../losses/np/14-07/losses_dp_{i}_epoch{latest_epochs[i]}.pt'
    loss_files.extend(glob.glob(file_pattern))


# Load the losses from each file and store them in a dictionary, with the experiment number as key
losses = {}
for file in loss_files:
    experiment_nr = int(file.split('_')[2])  # Extract the experiment number from the filename
    losses[experiment_nr] = torch.load(file)

losses[1]['train_loss']


KeyError: 1

In [None]:
def plot_loss(train_loss, validation_loss, experiment_nr):
    plt.figure(figsize=fig_size)
    plt.plot(train_loss, label='Train Loss', color='blue')
    plt.plot(validation_loss, label='Validation Loss', color='orange')
    plt.title(f'Losses for Experiment {experiment_nr}: noise multiplier={experiments[experiment_nr-1]}')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(f'{figure_base_path}losses_experiment_{experiment_nr}.pdf')
    plt.close()


for i in range(1, 10):
    plot_loss(losses[i]['train_loss'], losses[i]['val_loss'], i)


In [None]:
metric_files = glob.glob('../metrics/np/14-07/metrics_dp_*_epoch*.pt')

# Load the metrics from each file and store them in a dictionary, with the experiment number as key 
metrics = {}
for file in metric_files:
    experiment_nr = int(file.split('_')[2])  # Extract the experiment number from the filename
    epoch = int(file.split('_')[-1].replace('epoch', '').replace('.pt', ''))  # Extract the epoch number
    if experiment_nr not in metrics:
        metrics[experiment_nr] = {}
    metrics[experiment_nr][epoch] = torch.load(file, weights_only=False)

metrics[1][100]

{'experiment': 1,
 'epoch': 100,
 'mse': 0.9895796775817871,
 'mae': 0.7962299585342407,
 'rmse': 0.9947761947200924}

In [None]:
def plot_metrics(metrics, experiment_nr):
    # Plot the metrics for a specific experiment
    # on the x-axis we have the epochs, on the y-axis we have the metric value
    # The metric values are mse, mae, and rmse
    for experiment_nr in metrics.keys():
        epochs = sorted(metrics[experiment_nr].keys())
        mse = [metrics[experiment_nr][epoch]['mse'] for epoch in epochs]
        mae = [metrics[experiment_nr][epoch]['mae'] for epoch in epochs]
        plt.figure(figsize=fig_size)
        plt.plot(epochs, mse, label='MSE', color='blue')
        plt.plot(epochs, mae, label='MAE', color='orange')
        plt.title(f'Metrics for Experiment {experiment_nr}: noise multiplier={experiments[experiment_nr-1]}')
        plt.xlabel('Epochs')
        plt.ylabel('Metric Value')
        plt.legend()
        plt.savefig(f'{figure_base_path}metrics_experiment_{experiment_nr}.pdf')
        plt.close()

    
for i in range(1, len(experiments) + 1):
    if i in metrics:
        plot_metrics(metrics, i)
    else:
        print(f"No metrics found for experiment {i}")


In [None]:
def plot_mse(metrics):
    # Plot the MSE for all experiments
    plt.figure(figsize=fig_size)
    for experiment_nr in metrics.keys():
        epochs = sorted(metrics[experiment_nr].keys())
        mse = [metrics[experiment_nr][epoch]['mse'] for epoch in epochs]
        plt.plot(epochs, mse, label=f'Experiment {experiment_nr}')
    plt.title('MSE for All Experiments')
    plt.xlabel('Epochs')
    plt.ylabel('MSE')
    plt.legend()
    plt.savefig(f'{figure_base_path}mse_all_experiments.pdf')
    plt.close()

plot_mse(metrics)

metrics.keys()

dict_keys([3, 1, 9, 4, 2, 7, 8, 6, 5])

In [None]:
# Experiment with the minimal metric values 
def minimal_metrics(metrics):
    # Find the experiment number with the minimal MSE, MAE, and RMSE at epoch 100
    min_mse = float('inf')
    min_mae = float('inf')
    min_rmse = float('inf')
    min_mse_experiment = None
    min_mae_experiment = None
    min_rmse_experiment = None
    for experiment_nr in metrics.keys():
        mse = metrics[experiment_nr][100]['mse']
        mae = metrics[experiment_nr][100]['mae']
        rmse = metrics[experiment_nr][100]['rmse']
        if mse < min_mse:
            min_mse = mse
            min_mse_experiment = experiment_nr
        if mae < min_mae:
            min_mae = mae
            min_mae_experiment = experiment_nr
        if rmse < min_rmse:
            min_rmse = rmse
            min_rmse_experiment = experiment_nr
    print(f'Minimal MSE: {min_mse} in Experiment {min_mse_experiment}')
    print(f'Minimal MAE: {min_mae} in Experiment {min_mae_experiment}')
    print(f'Minimal RMSE: {min_rmse} in Experiment {min_rmse_experiment}')

minimal_metrics(metrics)

Minimal MSE: 0.9895796775817871 in Experiment 1
Minimal MAE: 0.7962299585342407 in Experiment 1
Minimal RMSE: 0.9947761947200924 in Experiment 1


In [None]:
def minimal_loss(losses):
    # Find the experiment number with the minimal train and validation loss at epoch 100
    min_train_loss = float('inf')
    min_val_loss = float('inf')
    min_train_loss_experiment = None
    min_val_loss_experiment = None
    for experiment_nr in losses.keys():
        train_loss = losses[experiment_nr]['train_loss'][-1]  # Last value is at epoch 100
        val_loss = losses[experiment_nr]['val_loss'][-1]  # Last value is at epoch 100
        if train_loss < min_train_loss:
            min_train_loss = train_loss
            min_train_loss_experiment = experiment_nr
        if val_loss < min_val_loss:
            min_val_loss = val_loss
            min_val_loss_experiment = experiment_nr
    print(f'Minimal Train Loss: {min_train_loss} in Experiment {min_train_loss_experiment}')
    print(f'Minimal Validation Loss: {min_val_loss} in Experiment {min_val_loss_experiment}')

minimal_loss(losses)

Minimal Train Loss: 0.8843784033541316 in Experiment 1
Minimal Validation Loss: 0.8281576169557989 in Experiment 1


In [None]:
for i in range(1, len(experiments) + 1):
    print(f'Experiment {i} - MSE: {metrics[i][latest_epoch[i]]["mse"]}, MAE: {metrics[i][latest_epoch[i]]["mae"]}, RMSE: {metrics[i][latest_epoch[i]]["rmse"]}')
    print(f'Experiment {i} - Train Loss: {losses[i]["train_loss"][-1]}, Validation Loss: {losses[i]["val_loss"][-1]}')




Experiment 1 - MSE: 0.8298844695091248, MAE: 0.7222684621810913, RMSE: 0.910979950113681
Experiment 1 - Train Loss: 0.8843784033541316, Validation Loss: 0.8281576169557989
Experiment 2 - MSE: 0.974905788898468, MAE: 0.7888841032981873, RMSE: 0.9873731761084398
Experiment 2 - Train Loss: 1.132909424400521, Validation Loss: 0.9733529802933423
Experiment 3 - MSE: 1.0243383646011353, MAE: 0.8078224658966064, RMSE: 1.0120960253855042
Experiment 3 - Train Loss: 1.1839740049365244, Validation Loss: 1.0203383520390161
Experiment 4 - MSE: 0.9437381029129028, MAE: 0.7762559056282043, RMSE: 0.9714618381145513
Experiment 4 - Train Loss: 1.0875967270111482, Validation Loss: 0.9400128256197842
Experiment 5 - MSE: 1.0202438831329346, MAE: 0.8056095838546753, RMSE: 1.0100712267622192
Experiment 5 - Train Loss: 1.2501224155315667, Validation Loss: 1.0199100833413595
Experiment 6 - MSE: 1.0377110242843628, MAE: 0.8136131167411804, RMSE: 1.018681021853437
Experiment 6 - Train Loss: 1.2757621965246015, Va

In [None]:
# Function to extract the epoch with the minimal validation loss for each experiment
def minimal_val_loss_epoch(losses):
    min_val_loss_epochs = {}
    for experiment_nr in losses.keys():
        val_loss = losses[experiment_nr]['val_loss']
        min_epoch = np.argmin(val_loss) + 1  # +1 to convert from 0-indexed to 1-indexed epoch
        # round the epoch to nearest number % 10 == 0
        min_epoch = round(min_epoch / 10) * 10
        min_val_loss_epochs[experiment_nr] = min_epoch
    return min_val_loss_epochs

min_val_loss_epochs = minimal_val_loss_epoch(losses)

# Function to extract the metric values at the epoch with the minimal validation loss
def metrics_at_min_val_loss(metrics, min_val_loss_epochs):
    metrics_at_min_val = {}
    for experiment_nr, epoch in min_val_loss_epochs.items():
        if epoch in metrics[experiment_nr]:
            metrics_at_min_val[experiment_nr] = metrics[experiment_nr][epoch]
        else:
            print(f"Epoch {epoch} not found for Experiment {experiment_nr}")
    return metrics_at_min_val

metrics_min_val_loss = metrics_at_min_val_loss(metrics, min_val_loss_epochs)

print("Metrics at Minimal Validation Loss:")
for experiment_nr, metric in metrics_min_val_loss.items():
    print(f"Experiment {experiment_nr} metrics at epoch {min_val_loss_epochs[experiment_nr]}: MSE: {metric['mse']}, MAE: {metric['mae']}, RMSE: {metric['rmse']}")



Metrics at Minimal Validation Loss:
Experiment 2 metrics at epoch 300: MSE: 0.974905788898468, MAE: 0.7888841032981873, RMSE: 0.9873731761084398
Experiment 7 metrics at epoch 300: MSE: 1.1638725996017456, MAE: 0.8482017517089844, RMSE: 1.0788292726848607
Experiment 4 metrics at epoch 300: MSE: 0.9437381029129028, MAE: 0.7762559056282043, RMSE: 0.9714618381145513
Experiment 8 metrics at epoch 300: MSE: 1.2185795307159424, MAE: 0.8636190295219421, RMSE: 1.1038928982088536
Experiment 1 metrics at epoch 300: MSE: 0.8298844695091248, MAE: 0.7222684621810913, RMSE: 0.910979950113681
Experiment 6 metrics at epoch 300: MSE: 1.0377110242843628, MAE: 0.8136131167411804, RMSE: 1.018681021853437
Experiment 3 metrics at epoch 280: MSE: 1.0382260084152222, MAE: 0.8138803243637085, RMSE: 1.018933760563081
Experiment 9 metrics at epoch 300: MSE: 1.1995395421981812, MAE: 0.8572567105293274, RMSE: 1.0952349255745002
Experiment 5 metrics at epoch 300: MSE: 1.0202438831329346, MAE: 0.8056095838546753, RMS

In [None]:
# TODO: plot RMSE vs epsilon for each experiment, adding a horizontal line at the minimal RMSE value from the non-private experiment
def plot_rmse_vs_epsilon(metrics, min_rmse):
    plt.figure(figsize=fig_size)
    epsilons = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]
    rmse_values = [metrics[i][100]['rmse'] for i in range(1, len(epsilons) + 1)]
    
    plt.plot(epsilons, rmse_values, marker='o', label='RMSE vs Epsilon')
    plt.axhline(y=min_rmse, color='r', linestyle='--', label='Minimal RMSE (Non-Private)')
    
    plt.title('RMSE vs Epsilon for Different Noise Multipliers')
    plt.xlabel('Epsilon (Noise Multiplier)')
    plt.ylabel('RMSE')
    plt.xticks(epsilons)
    plt.legend()
    plt.savefig(f'{figure_base_path}rmse_vs_epsilon.pdf')
    plt.close()

# Calculate the minimal RMSE from the non-private experiment (experiment 1)
min_rmse_np = 0.911
plot_rmse_vs_epsilon(metrics, min_rmse_np)

In [None]:
# TODO: loss vs epsilon for each experiment, adding a horizontal line at the minimal validation loss value from the non-private experiment
def plot_loss_vs_epsilon(losses, min_val_loss_np):
    plt.figure(figsize=fig_size)
    epsilons = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]
    val_loss_values = [losses[i]['val_loss'][-1] for i in range(1, len(epsilons) + 1)]
    
    plt.plot(epsilons, val_loss_values, marker='o', label='Validation Loss vs Epsilon')
    plt.axhline(y=min_val_loss_np, color='r', linestyle='--', label='Minimal Validation Loss (Non-Private)')
    
    plt.title('Validation Loss vs Epsilon for Different Noise Multipliers')
    plt.xlabel('Epsilon (Noise Multiplier)')
    plt.ylabel('Validation Loss')
    plt.xticks(epsilons)
    plt.legend()
    plt.savefig(f'{figure_base_path}loss_vs_epsilon.pdf')
    plt.close()

# Calculate the minimal validation loss from the non-private experiment (experiment 1)
min_val_loss_np = 0.828
plot_loss_vs_epsilon(losses, min_val_loss_np)