In [1]:
TRAINING_HIST_PATH = 'C:/Users/leona/Documents/Mestrado/nano-optics-ml/src/saved_training_history'

In [2]:
import glob, json, os, re
import pandas as pd

# Updated regex: allow optional scientific notation (e.g. 1e-05) for lr and wgtdecay
pattern = re.compile(
    r'(?P<model>[A-Za-z0-9]+)_'
    r'lr(?P<lr>[\d\.]+(?:e[+-]?\d+)?)_'              
    r'bs(?P<bs>\d+)_'
    r'epochs(?P<epochs>\d+)_'
    r'wgtdecay(?P<wgtdecay>[\d\.]+(?:e[+-]?\d+)?)_'  
    r'hidden_size(?P<hidden_size>\d+)_'
    r'num_hidden_layers(?P<num_hidden_layers>\d+)_'
    r'(?P<date>\d{8})_(?P<time>\d{6})\.json$'
)

rows = []
for filepath in glob.glob("C:/Users/leona/Documents/Mestrado/nano-optics-ml/src/saved_training_history/*.json"):
    fname = os.path.basename(filepath)
    m = pattern.match(fname)
    if not m:
        print(f"Skipping {fname}")
        continue

    params = m.groupdict()
    # parse run datetime
    params['run_datetime'] = pd.to_datetime(
        params['date'] + params['time'], format="%Y%m%d%H%M%S"
    )

    # convert everything numeric to the correct type
    params['lr'] = float(params['lr'])
    params['wgtdecay'] = float(params['wgtdecay'])
    for int_field in ('bs','epochs','hidden_size','num_hidden_layers'):
        params[int_field] = int(params[int_field])

    # load the JSON history
    with open(filepath) as f:
        history = json.load(f)

    # build per-epoch DataFrame
    df_hist = (
        pd.DataFrame.from_dict(history, orient='index')
          .rename_axis('epoch')
          .reset_index()
          .astype({'epoch': int})
    )

    # pick out the final epoch
    last = df_hist.loc[df_hist['epoch'].idxmax(), ['epoch','loss','test_loss']]

    # combine into one row
    row = {
        **params,
        'final_epoch': int(last['epoch']),
        'final_loss': float(last['loss']),
        'final_test_loss': float(last['test_loss']),
        'file_name':fname,
    }
    rows.append(row)

# assemble the summary DataFrame
df_summary = pd.DataFrame(rows).sort_values(
    ['model','run_datetime']
).reset_index(drop=True)

print(df_summary)


    model      lr   bs  epochs  wgtdecay  hidden_size  num_hidden_layers  \
0     HNN  0.0010    8     500   0.00001            8                  3   
1     HNN  0.0010    8     500   0.00001           16                  3   
2     HNN  0.0010    8     500   0.00010            8                  3   
3     HNN  0.0010    8     500   0.00010           16                  3   
4     HNN  0.0010    8     500   0.00100            8                  3   
..    ...     ...  ...     ...       ...          ...                ...   
472    NN  0.0001  512    7000   0.00001           16                  3   
473    NN  0.0001  512    7000   0.00010            8                  3   
474    NN  0.0001  512    7000   0.00010           16                  3   
475    NN  0.0001  512    7000   0.00100            8                  3   
476    NN  0.0001  512    7000   0.00100           16                  3   

         date    time        run_datetime  final_epoch  final_loss  \
0    20250414  04

In [3]:
df_summary.query("""model == 'NN'""").sort_values(['final_test_loss']).reset_index(drop=True).head(30)

Unnamed: 0,model,lr,bs,epochs,wgtdecay,hidden_size,num_hidden_layers,date,time,run_datetime,final_epoch,final_loss,final_test_loss,file_name
0,NN,0.001,8,7000,1e-05,16,3,20250412,230722,2025-04-12 23:07:22,7000,0.011369,0.017902,NN_lr0.001_bs8_epochs7000_wgtdecay1e-05_hidden...
1,NN,0.0005,8,7000,1e-05,16,3,20250413,94956,2025-04-13 09:49:56,7000,0.011869,0.020131,NN_lr0.0005_bs8_epochs7000_wgtdecay1e-05_hidde...
2,NN,0.001,8,5000,1e-05,16,3,20250412,210423,2025-04-12 21:04:23,5000,0.011736,0.022787,NN_lr0.001_bs8_epochs5000_wgtdecay1e-05_hidden...
3,NN,0.001,32,5000,1e-05,16,3,20250413,35536,2025-04-13 03:55:36,5000,0.013843,0.024358,NN_lr0.001_bs32_epochs5000_wgtdecay1e-05_hidde...
4,NN,0.001,8,7000,1e-05,8,3,20250412,224216,2025-04-12 22:42:16,7000,0.018938,0.025201,NN_lr0.001_bs8_epochs7000_wgtdecay1e-05_hidden...
5,NN,0.0005,8,5000,1e-05,16,3,20250413,74746,2025-04-13 07:47:46,5000,0.015321,0.026479,NN_lr0.0005_bs8_epochs5000_wgtdecay1e-05_hidde...
6,NN,0.0005,16,5000,1e-05,16,3,20250413,121642,2025-04-13 12:16:42,5000,0.01747,0.028196,NN_lr0.0005_bs16_epochs5000_wgtdecay1e-05_hidd...
7,NN,0.001,128,7000,1e-05,16,3,20250413,52247,2025-04-13 05:22:47,7000,0.023957,0.029287,NN_lr0.001_bs128_epochs7000_wgtdecay1e-05_hidd...
8,NN,0.0005,16,7000,1e-05,16,3,20250413,132046,2025-04-13 13:20:46,7000,0.019194,0.029805,NN_lr0.0005_bs16_epochs7000_wgtdecay1e-05_hidd...
9,NN,0.001,16,7000,1e-05,16,3,20250413,23722,2025-04-13 02:37:22,7000,0.009982,0.030205,NN_lr0.001_bs16_epochs7000_wgtdecay1e-05_hidde...


In [4]:
df_summary.query("""model == 'HNN'""").sort_values(['final_test_loss']).reset_index(drop=True)


Unnamed: 0,model,lr,bs,epochs,wgtdecay,hidden_size,num_hidden_layers,date,time,run_datetime,final_epoch,final_loss,final_test_loss,file_name
0,HNN,0.001,8,7000,1e-05,16,3,20250417,123531,2025-04-17 12:35:31,7000,0.007487,0.026053,HNN_lr0.001_bs8_epochs7000_wgtdecay1e-05_hidde...
1,HNN,0.001,8,5000,1e-05,16,3,20250417,102631,2025-04-17 10:26:31,5000,0.009985,0.031324,HNN_lr0.001_bs8_epochs5000_wgtdecay1e-05_hidde...
2,HNN,0.001,32,5000,1e-05,16,3,20250417,144157,2025-04-17 14:41:57,5000,0.009236,0.032221,HNN_lr0.001_bs32_epochs5000_wgtdecay1e-05_hidd...
3,HNN,0.001,32,7000,1e-05,16,3,20250417,153325,2025-04-17 15:33:25,7000,0.012272,0.034499,HNN_lr0.001_bs32_epochs7000_wgtdecay1e-05_hidd...
4,HNN,0.001,8,2000,1e-05,16,3,20250414,43028,2025-04-14 04:30:28,2000,0.016132,0.03464,HNN_lr0.001_bs8_epochs2000_wgtdecay1e-05_hidde...
5,HNN,0.001,8,5000,0.0001,16,3,20250417,110013,2025-04-17 11:00:13,5000,0.026501,0.040609,HNN_lr0.001_bs8_epochs5000_wgtdecay0.0001_hidd...
6,HNN,0.001,8,5000,1e-05,8,3,20250417,100920,2025-04-17 10:09:20,5000,0.019552,0.043488,HNN_lr0.001_bs8_epochs5000_wgtdecay1e-05_hidde...
7,HNN,0.001,32,7000,1e-05,8,3,20250417,152406,2025-04-17 15:24:06,7000,0.01989,0.049106,HNN_lr0.001_bs32_epochs7000_wgtdecay1e-05_hidd...
8,HNN,0.001,32,5000,0.0001,16,3,20250417,145720,2025-04-17 14:57:20,5000,0.036021,0.052979,HNN_lr0.001_bs32_epochs5000_wgtdecay0.0001_hid...
9,HNN,0.001,8,7000,0.0001,16,3,20250417,133155,2025-04-17 13:31:55,7000,0.029154,0.057214,HNN_lr0.001_bs8_epochs7000_wgtdecay0.0001_hidd...
