## Centralized training

- Explore hyperparameters 
- Check that the model can learn in a centralized setting (setup sanity check)
- Get approximately the global centralized loss value it can reach. To be compared with federated setting.

In [104]:
import os
os.chdir("/Users/luisa/Desktop/nygc/cluster/projects/fl4tsf")
import torch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import numpy as np
import json
from plot_utils import plot_n_outputs
from flower.task import Net


centralized_training_loss = glob.glob(f"results_centralized_hyperparam_new/**/centralized_training/**/loss_per_epoch.csv", recursive=True)
print(f"Found {len(centralized_training_loss)} files")


2025-05-30 11:55:46,055	INFO util.py:154 -- Outdated packages:
  ipywidgets==7.8.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


Found 60 files


In [90]:
# For each file in the list, read the meta.csv file and extract lr  and read in the loss_per_epoch.csv file
def read_loss_file(file):
    # Read the meta.csv file
    meta_file = file.replace("loss_per_epoch.csv", "meta.csv")
    meta_data = pd.read_csv(meta_file)
    lr = meta_data['lr'].item()
    clipping = meta_data['gradientclipping'].item()
    bs = meta_data['batch_size'].item()
    lrdecay = meta_data['lrdecay'].item()
    
    # Read the loss_per_epoch.csv file
    loss_df = pd.read_csv(file)
    
    # Add the learning rate to the DataFrame
    loss_df['lr'] = lr
    loss_df['clipping'] = clipping
    loss_df['batch_size'] = bs
    loss_df['lrdecay'] = lrdecay
    
    return loss_df
# Read all the loss files and concatenate them into a single DataFrame
loss_dfs = []
for file in centralized_training_loss:
    loss_df = read_loss_file(file)
    loss_dfs.append(loss_df)
loss_df = pd.concat(loss_dfs, ignore_index=True)
# Convert the epoch column to a numeric type
loss_df["epoch"] = pd.to_numeric(loss_df["epoch"], errors='coerce')
# add 1
loss_df["epoch"] = loss_df["epoch"] + 1
df_centralized_training_loss = loss_df

# combine all but loss into hyperparam column
def combine_hyperparams(row):
    return f"lr: {row['lr']}, clipping: {row['clipping']}, batch_size: {row['batch_size']}, lrdecay: {row['lrdecay']}"
# apply the function to each row
df_centralized_training_loss['hyperparams'] = df_centralized_training_loss.apply(combine_hyperparams, axis=1)

# plot uniqure values of lr and clipping
unique_lrs = df_centralized_training_loss['lr'].unique()
unique_clippings = df_centralized_training_loss['clipping'].unique()
unique_batch_sizes = df_centralized_training_loss['batch_size'].unique()
print("Unique learning rates:", unique_lrs)
print("Unique clipping values:", unique_clippings)
print("Unique batch sizes:", unique_batch_sizes)


# prepare the data for plotting
df_loss_long = pd.concat([
    df_centralized_training_loss[['epoch', 'lr', 'batch_size','lrdecay', 'hyperparams', 'train_loss']].rename(columns={'train_loss': 'loss'}).assign(type='train'),
    df_centralized_training_loss[['epoch', 'lr', 'batch_size','lrdecay', 'hyperparams', 'val_loss']].rename(columns={'val_loss': 'loss'}).assign(type='val')
], ignore_index=True)


training = df_loss_long[df_loss_long['type'] == 'train']
validation = df_loss_long[df_loss_long['type'] == 'val']

Unique learning rates: [0.1   0.001 0.01 ]
Unique clipping values: [False]
Unique batch sizes: [ 64  50  16 100  32]


### Check which batchsize

In [91]:

g = sns.FacetGrid(df_loss_long, col="type", height=4, aspect=1.5, sharey=True)

g.map_dataframe(
    sns.lineplot,
    x="epoch",
    y="loss",
    hue="batch_size",
    palette="Set1"
)

g.set_axis_labels("Epoch", "Loss")
g.set_titles(col_template="{col_name} loss")
g.add_legend(title="batch size")
g._legend.set_bbox_to_anchor((1.05, 0.5))
g.fig.suptitle("training vs validation loss by batch size \n\n the smallest the batchsize, the better \n\n", fontsize=12)
plt.tight_layout()
# log scale y axis

plt.show()


  plt.show()


### Check which learning rate

In [92]:
g = sns.FacetGrid(df_loss_long, col="type", height=4, aspect=1.5, sharey=True)

g.map_dataframe(
    sns.lineplot,
    x="epoch",
    y="loss",
    hue="lr",
    palette="Set1"
)

g.set_axis_labels("Epoch", "Loss")
g.set_titles(col_template="{col_name} loss")
g.add_legend(title="lr")
g._legend.set_bbox_to_anchor((1.05, 0.5))
g.fig.suptitle("training vs validation loss by learning rate \n\n TODO \n\n", fontsize=12)
plt.tight_layout()
# plot log scale y axis
plt.yscale('log')
plt.show()


  plt.show()


In [93]:
g = sns.FacetGrid(df_loss_long, col="type", height=4, aspect=1.5, sharey=True)

g.map_dataframe(
    sns.lineplot,
    x="epoch",
    y="loss",
    hue="lrdecay",
    palette="Set1"
)

g.set_axis_labels("Epoch", "Loss")
g.set_titles(col_template="{col_name} loss")
g.add_legend(title="lrdecay")
g._legend.set_bbox_to_anchor((1.02, 0.5))
g.fig.suptitle("training vs validation loss by learning rate \n\n TODO \n\n", fontsize=12)
plt.tight_layout()
# plot log scale y axis
plt.yscale('log')
plt.show()

  plt.show()


### Go more in depth 

In [94]:
import seaborn as sns
import matplotlib.pyplot as plt

# Loop over each unique batch size
for batch_size in sorted(training['batch_size'].unique()):
    tr_batch = training[training['batch_size'] == batch_size]

    g = sns.FacetGrid(tr_batch, col='lr', col_wrap=3, height=3.5, sharey=True)

    g.map_dataframe(
        sns.lineplot,
        x='epoch',
        y='loss',
        hue='lrdecay',
        palette='Set1',
        legend='brief'
    )

    g.set_axis_labels('Epoch', 'Loss')
    g.set_titles(col_template='lr = {col_name}')
    g.fig.subplots_adjust(top=0.85)  # leave space for main title
    g.fig.suptitle(f"Batch Size = {batch_size}", fontsize=12)

    # move legend outside
    g.add_legend()
    g._legend.set_bbox_to_anchor((1.15, 0.6))
    # y log
   
    plt.tight_layout()
    plt.show()


  plt.show()
  plt.show()
  plt.show()
  plt.show()
  plt.show()


In [95]:
# Add title of type 

# add main title 
tr_16 = validation[validation['batch_size'] == 16]

g = sns.FacetGrid(tr_16, col='lr', col_wrap=3, height=3.5, sharey=True)

g.map_dataframe(
    sns.lineplot,
    x='epoch',
    y='loss',
    hue='lrdecay',
    palette='Set1',
    legend='brief'
)

g.set_axis_labels('Epoch', 'Loss')
g.add_legend()
# add legend outside
g._legend.set_bbox_to_anchor((1.15, 0.6))
g.set_titles(col_template='lr = {col_name}')
# log scale y axis


plt.tight_layout()
plt.show()


  plt.show()


# Plot best combination of hyperparameters 

In [96]:
training

Unnamed: 0,epoch,lr,batch_size,lrdecay,hyperparams,loss,type
0,1,0.1,64,1.00,"lr: 0.1, clipping: False, batch_size: 64, lrde...",5239.114258,train
1,2,0.1,64,1.00,"lr: 0.1, clipping: False, batch_size: 64, lrde...",17432.500000,train
2,3,0.1,64,1.00,"lr: 0.1, clipping: False, batch_size: 64, lrde...",6616.784668,train
3,4,0.1,64,1.00,"lr: 0.1, clipping: False, batch_size: 64, lrde...",3882.172363,train
4,5,0.1,64,1.00,"lr: 0.1, clipping: False, batch_size: 64, lrde...",3273.804688,train
...,...,...,...,...,...,...,...
17995,296,0.1,50,0.01,"lr: 0.1, clipping: False, batch_size: 50, lrde...",2631.057373,train
17996,297,0.1,50,0.01,"lr: 0.1, clipping: False, batch_size: 50, lrde...",2647.614502,train
17997,298,0.1,50,0.01,"lr: 0.1, clipping: False, batch_size: 50, lrde...",2614.049316,train
17998,299,0.1,50,0.01,"lr: 0.1, clipping: False, batch_size: 50, lrde...",2644.792725,train


In [97]:
best_bs =32
best_lr = 0.01
best_decay = 1.0

loss_type = "train"


df_plot = df_centralized_training_loss[
    (df_centralized_training_loss['batch_size'] == best_bs) &
    (df_centralized_training_loss['lr'] == best_lr) &
    (df_centralized_training_loss['lrdecay'] == best_decay)
]
# combine lr, clipping and batch size into a single column
df_plot['lr_decay_bs'] = df_plot['lr'].astype(str) + "_" + df_plot['lrdecay'].astype(str) + "_" + df_plot['batch_size'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_plot['lr_decay_bs'] = df_plot['lr'].astype(str) + "_" + df_plot['lrdecay'].astype(str) + "_" + df_plot['batch_size'].astype(str)


In [98]:
df_plot

Unnamed: 0,epoch,train_loss,train_mse,val_loss,val_mse,lr,clipping,batch_size,lrdecay,hyperparams,lr_decay_bs
12600,1,2576.834229,0.517930,2556.682792,0.513784,0.01,False,32,1.0,"lr: 0.01, clipping: False, batch_size: 32, lrd...",0.01_1.0_32
12601,2,2324.423584,0.466746,2385.601249,0.478795,0.01,False,32,1.0,"lr: 0.01, clipping: False, batch_size: 32, lrd...",0.01_1.0_32
12602,3,2370.773682,0.475651,2379.377651,0.477812,0.01,False,32,1.0,"lr: 0.01, clipping: False, batch_size: 32, lrd...",0.01_1.0_32
12603,4,2311.869873,0.464138,2365.524902,0.474919,0.01,False,32,1.0,"lr: 0.01, clipping: False, batch_size: 32, lrd...",0.01_1.0_32
12604,5,2305.396240,0.462834,2354.089425,0.472137,0.01,False,32,1.0,"lr: 0.01, clipping: False, batch_size: 32, lrd...",0.01_1.0_32
...,...,...,...,...,...,...,...,...,...,...,...
12895,296,37.334778,0.006690,39.505858,0.007195,0.01,False,32,1.0,"lr: 0.01, clipping: False, batch_size: 32, lrd...",0.01_1.0_32
12896,297,19.497768,0.003047,18.009737,0.002689,0.01,False,32,1.0,"lr: 0.01, clipping: False, batch_size: 32, lrd...",0.01_1.0_32
12897,298,51.506371,0.009707,56.613772,0.010795,0.01,False,32,1.0,"lr: 0.01, clipping: False, batch_size: 32, lrd...",0.01_1.0_32
12898,299,65.183487,0.012122,34.024928,0.006139,0.01,False,32,1.0,"lr: 0.01, clipping: False, batch_size: 32, lrd...",0.01_1.0_32


In [99]:
plt.figure(figsize=(5, 2.5))

# # Plot training loss
sns.lineplot(
    data=df_plot,
    x='epoch',
    y='train_loss',
    hue='lr_decay_bs',
    palette='tab10'
)
reds = sns.color_palette("Reds", n_colors=1)
plt.legend(title="lr, decay, batch size", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.ylim(bottom=0)
plt.xlim(left=0, right=20)
plt.xticks(ticks=range(0, df_plot['epoch'].max() + 1, 50))
plt.title('training loss ')
plt.show()
# # Plot validation loss
plt.figure(figsize=(5, 2.5))
sns.lineplot(
    data=df_plot,
    x='epoch',
    y='val_loss',
    hue='lr_decay_bs',
    palette=reds,
    linestyle='-',
)

plt.ylim(bottom=0)
plt.xlim(left=0, right=20)
plt.xticks(ticks=range(0, df_plot['epoch'].max() + 1, 50))
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('validation loss ')

plt.tight_layout()
plt.legend(title="lr, decay, batch size", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()


  plt.show()
  plt.show()


In [100]:
# Get the min loss
min_train_loss = df_plot['train_loss'].min()
min_val_loss = df_plot['val_loss'].min()
print(f"Minimum training loss: {min_train_loss}")
print(f"Minimum validation loss: {min_val_loss}")

Minimum training loss: 9.22089958190918
Minimum validation loss: 10.799880708966937


## Visualize the models predictions

In [None]:
#############################
# Load Model and Weigths
#############################

weights_file = "../results_federated_learningtest/periodic/federated_training/FedAvg/100_rounds/rep_1-alpha_0.5-lr_0.001-batchsize_32_clipping_False_lrdecay_1.0_localepochs_3/federated_outputs/model.pth"
weights = torch.load(weights_file, weights_only=True)
model = Net()
model.load_state_dict(weights)

#############################
# Load Dataset
#############################
test_dataset_filename = "/Users/luisa/Desktop/nygc/cluster/projects/fl4tsf/data/periodic/periodic_test.pt"
time_steps_filename = "/Users/luisa/Desktop/nygc/cluster/projects/fl4tsf/data/periodic/periodic_time_steps.pt"
timestamps = torch.load(time_steps_filename, weights_only=True)
dataset = torch.load(test_dataset_filename, weights_only=True)

plot_n_outputs(model, dataset, timestamps)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/luisa/Desktop/nygc/cluster/projects/fl4tsf/results_federated_learningtest/periodic/federated_training/FedAvg/100_rounds/rep_1-alpha_0.5-lr_0.01-batchsize_16_clipping_False_lrdecay_1.0_localepochs_10/federated_outputs/model.pth'

NameError: name 'plot_n_outputs' is not defined