In [1]:
import pandas as pd
import tensorflow as tf

DATA_PATH = '/home/kshipra/work/major/ml experiments/data/manually_combined.xlsx'

eos = pd.read_excel(DATA_PATH, sheet_name='all_stacked_eos')
sentinel = pd.read_excel(DATA_PATH, sheet_name='all_stacked_sentinel')
combined = pd.read_excel(DATA_PATH, sheet_name='eos_sent_combined')

len(eos), len(sentinel), len(combined)

2025-09-09 21:50:56.590433: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


(747, 796, 747)

In [2]:
combined = combined[combined['SM (Combined)'] < 150]

X_cols = ['VH', 'VV', 'HH', 'HV', 'Angle']
y_col = ['SM (Combined)']

In [3]:
import pandas as pd
from model_experiments import PredictionIntervalEstimation
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

In [21]:
def fine_tune(param_grid, random_seed=42):
    all_results = []

    features = X_cols
    target = y_col[0]
    data = combined

    for idx, params in enumerate(param_grid):
        print(f"\n--- Testing Configuration: {idx}:{params} ---")

        tf.keras.backend.clear_session()
        
        model_layers = [Input(shape=(len(features),))]
        for units in params['layers']:
            model_layers.append(Dense(units, activation='relu'))
            if params['dropout'] > 0:
                model_layers.append(Dropout(params['dropout']))
        model_layers.append(Dense(1)) # Output layer
        print(model_layers)
        model = Sequential(model_layers)

        exp = PredictionIntervalEstimation(data, features=features, target=target, satellite='EOS+Sentinel', random_state=random_seed)

        y_lower_test, y_upper_test, y_lower_val, y_upper_val = exp.train_model(
            model, 
            learning_rate=params['lr'], 
            batch_size=params['batch_size'],
            epochs=500, # Use a sufficient number of epochs, rely on early stopping
        )
        
        metrics = exp.evaluate_model(exp.y_val, y_lower_val, y_upper_val)
        
        result_entry = params.copy()
        result_entry['PICP'] = metrics['PICP']
        result_entry['MPIW'] = metrics['MPIW']
        all_results.append(result_entry)
        print()

    all_results_df = pd.DataFrame(all_results)
    print("\n--- Experiment Results Summary ---")
    print(all_results_df)

    return all_results_df

In [None]:
# A highly focused parameter grid with 12 combinations

param_grid = [
    {'layers': [8], 'dropout': 0.0, 'lr': 0.001, 'batch_size': 32},
    {'layers': [8], 'dropout': 0.1, 'lr': 0.001, 'batch_size': 32},
    {'layers': [8], 'dropout': 0.2, 'lr': 0.001, 'batch_size': 32},

    {'layers': [16], 'dropout': 0.0, 'lr': 0.001, 'batch_size': 32},
    {'layers': [16], 'dropout': 0.1, 'lr': 0.001, 'batch_size': 32},
    {'layers': [16], 'dropout': 0.2, 'lr': 0.001, 'batch_size': 32},

    {'layers': [8, 4], 'dropout': 0.0, 'lr': 0.001, 'batch_size': 32},
    {'layers': [8, 4], 'dropout': 0.1, 'lr': 0.001, 'batch_size': 32},
    {'layers': [8, 4], 'dropout': 0.2, 'lr': 0.001, 'batch_size': 32},

    {'layers': [8],  'dropout': 0.1, 'lr': 0.0005, 'batch_size': 32}, 
    {'layers': [16], 'dropout': 0.1, 'lr': 0.0005, 'batch_size': 32}, 
    {'layers': [8],  'dropout': 0.1, 'lr': 0.001,  'batch_size': 16}, 
]

# Total experiments: 12
print(f"Generated {len(param_grid)} parameter combinations to test.")

Generated 12 parameter combinations to test.


In [None]:
results_df = fine_tune(param_grid)

In [19]:
results_df[results_df['PICP'] > 0.95]

Unnamed: 0,layers,dropout,lr,batch_size,PICP,MPIW
1,[8],0.1,0.001,32,0.964286,33.376377
2,[8],0.2,0.001,32,0.97619,35.415985
4,[16],0.1,0.001,32,0.952381,32.641106
5,[16],0.2,0.001,32,0.964286,33.399944
7,"[8, 4]",0.1,0.001,32,0.964286,33.898605
8,"[8, 4]",0.2,0.001,32,0.970238,36.210178
9,[8],0.1,0.0005,32,0.964286,34.380848
10,[16],0.1,0.0005,32,0.958333,32.548027
11,[8],0.1,0.001,16,0.958333,33.715019


## Focused Approach

In [None]:
param_grid = [
    {'layers': [16], 'dropout': 0.0, 'lr': 0.0005, 'batch_size': 32},   
    {'layers': [16], 'dropout': 0.05, 'lr': 0.0005, 'batch_size': 32},  
    {'layers': [16], 'dropout': 0.075, 'lr': 0.0005, 'batch_size': 32}, 

    {'layers': [8], 'dropout': 0.0, 'lr': 0.001, 'batch_size': 32},     
    {'layers': [8], 'dropout': 0.05, 'lr': 0.001, 'batch_size': 32},    
    {'layers': [8], 'dropout': 0.15, 'lr': 0.001, 'batch_size': 32},    
    
    {'layers': [16, 8], 'dropout': 0.0, 'lr': 0.0005, 'batch_size': 32},
    {'layers': [16, 8], 'dropout': 0.1, 'lr': 0.0005, 'batch_size': 32},
    {'layers': [16], 'dropout': 0.05, 'lr': 0.0001, 'batch_size': 32},
    {'layers': [8],  'dropout': 0.05, 'lr': 0.0001, 'batch_size': 32},
]

# Total experiments: 10
print(f"Generated {len(param_grid)} parameter combinations for fine-tuning.")

Generated 10 parameter combinations for fine-tuning.


In [23]:
results_df = fine_tune(param_grid)

Epochs:  36%|███▋      | 182/500 [26:18<45:58,  8.68s/epoch, loss=2.6443, val_loss=0.7142]



--- Testing Configuration: 0:{'layers': [16], 'dropout': 0.0, 'lr': 0.0005, 'batch_size': 32} ---
--------- TRAINING UPPER MODEL -----------



Epochs:  78%|███████▊  | 392/500 [00:35<00:09, 11.09epoch/s, loss=0.4941, val_loss=0.5876] 


--------- TRAINING LOWER MODEL -----------



Epochs:  46%|████▌     | 231/500 [00:20<00:24, 11.04epoch/s, loss=0.4290, val_loss=0.4056]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


--- Testing Configuration: 1:{'layers': [16], 'dropout': 0.05, 'lr': 0.0005, 'batch_size': 32} ---
--------- TRAINING UPPER MODEL -----------



Epochs:  71%|███████   | 354/500 [00:32<00:13, 10.99epoch/s, loss=0.5794, val_loss=0.5520] 


--------- TRAINING LOWER MODEL -----------



Epochs:  39%|███▉      | 194/500 [00:17<00:28, 10.81epoch/s, loss=0.4327, val_loss=0.4085]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


--- Testing Configuration: 2:{'layers': [16], 'dropout': 0.075, 'lr': 0.0005, 'batch_size': 32} ---
--------- TRAINING UPPER MODEL -----------



Epochs: 100%|██████████| 500/500 [00:45<00:00, 10.98epoch/s, loss=0.5308, val_loss=0.5435] 


--------- TRAINING LOWER MODEL -----------



Epochs:  30%|██▉       | 148/500 [00:13<00:32, 10.68epoch/s, loss=0.4376, val_loss=0.4079]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


--- Testing Configuration: 3:{'layers': [8], 'dropout': 0.0, 'lr': 0.001, 'batch_size': 32} ---
--------- TRAINING UPPER MODEL -----------



Epochs: 100%|██████████| 500/500 [00:45<00:00, 11.06epoch/s, loss=0.4816, val_loss=0.5841] 


--------- TRAINING LOWER MODEL -----------



Epochs: 100%|██████████| 500/500 [00:46<00:00, 10.71epoch/s, loss=0.4254, val_loss=0.4001]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


--- Testing Configuration: 4:{'layers': [8], 'dropout': 0.05, 'lr': 0.001, 'batch_size': 32} ---
--------- TRAINING UPPER MODEL -----------



Epochs: 100%|██████████| 500/500 [00:48<00:00, 10.39epoch/s, loss=1.4051, val_loss=0.6300] 


--------- TRAINING LOWER MODEL -----------



Epochs: 100%|██████████| 500/500 [00:46<00:00, 10.69epoch/s, loss=0.4308, val_loss=0.3984]

[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 33ms/step




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


--- Testing Configuration: 5:{'layers': [8], 'dropout': 0.15, 'lr': 0.001, 'batch_size': 32} ---
--------- TRAINING UPPER MODEL -----------



Epochs:  36%|███▌      | 180/500 [00:20<00:35,  8.98epoch/s, loss=0.9387, val_loss=0.5728] 


--------- TRAINING LOWER MODEL -----------



Epochs:  45%|████▌     | 226/500 [00:21<00:26, 10.49epoch/s, loss=0.4475, val_loss=0.4210]

[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 35ms/step




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


--- Testing Configuration: 6:{'layers': [16, 8], 'dropout': 0.0, 'lr': 0.0005, 'batch_size': 32} ---
--------- TRAINING UPPER MODEL -----------



Epochs:  88%|████████▊ | 439/500 [00:39<00:05, 11.01epoch/s, loss=0.4677, val_loss=0.5805] 


--------- TRAINING LOWER MODEL -----------



Epochs: 100%|██████████| 500/500 [00:44<00:00, 11.17epoch/s, loss=0.4096, val_loss=0.3939]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


--- Testing Configuration: 7:{'layers': [16, 8], 'dropout': 0.1, 'lr': 0.0005, 'batch_size': 32} ---
--------- TRAINING UPPER MODEL -----------



Epochs:  24%|██▍       | 119/500 [00:11<00:36, 10.43epoch/s, loss=0.7351, val_loss=0.5905] 


--------- TRAINING LOWER MODEL -----------



Epochs:  25%|██▌       | 125/500 [00:11<00:35, 10.53epoch/s, loss=0.4552, val_loss=0.4058]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


--- Testing Configuration: 8:{'layers': [16], 'dropout': 0.05, 'lr': 0.0001, 'batch_size': 32} ---
--------- TRAINING UPPER MODEL -----------



Epochs: 100%|██████████| 500/500 [00:48<00:00, 10.23epoch/s, loss=3.1940, val_loss=3.2893]  


--------- TRAINING LOWER MODEL -----------



Epochs: 100%|██████████| 500/500 [00:47<00:00, 10.54epoch/s, loss=0.4320, val_loss=0.4057]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


--- Testing Configuration: 9:{'layers': [8], 'dropout': 0.05, 'lr': 0.0001, 'batch_size': 32} ---
--------- TRAINING UPPER MODEL -----------



Epochs: 100%|██████████| 500/500 [00:45<00:00, 10.94epoch/s, loss=7.8451, val_loss=8.0070]  


--------- TRAINING LOWER MODEL -----------



Epochs: 100%|██████████| 500/500 [00:45<00:00, 11.10epoch/s, loss=0.4259, val_loss=0.4110]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


--- Experiment Results Summary ---
    layers  dropout      lr  batch_size      PICP       MPIW
0     [16]    0.000  0.0005          32  0.940476  30.762806
1     [16]    0.050  0.0005          32  0.958333  33.504383
2     [16]    0.075  0.0005          32  0.952381  33.355644
3      [8]    0.000  0.0010          32  0.934524  30.475615
4      [8]    0.050  0.0010          32  0.916667  27.909569
5      [8]    0.150  0.0010          32  0.958333  34.108868
6  [16, 8]    0.000  0.0005          32  0.922619  29.807980
7  [16, 8]    0.100  0.0005          32  0.964286  32.557167
8     [16]    0.050  0.0001          32  0.494048  15.547840
9      [8]    0.050  0.0001          32  0.166667   8.397850


In [25]:
results_df.to_csv('test.csv')

### More Focused

In [7]:
param_grid_final = [
    {'layers': [16, 8], 'dropout': 0.1,    'lr': 0.0005, 'batch_size': 32},

    {'layers': [16, 8], 'dropout': 0.09,   'lr': 0.0005, 'batch_size': 32},
    {'layers': [16, 8], 'dropout': 0.08,   'lr': 0.0005, 'batch_size': 32},
    {'layers': [16, 8], 'dropout': 0.07,   'lr': 0.0005, 'batch_size': 32},
]

In [8]:
results_df = fine_tune(param_grid_final)


--- Testing Configuration: 0:{'layers': [16, 8], 'dropout': 0.1, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:   3%|▎         | 13/500 [00:02<01:06,  7.33epoch/s, loss=22.9319, val_loss=22.9340]

KeyboardInterrupt: 

## Final HP

In [15]:
random_seeds = [42, 123, 888, 1337, 2024]
configs = [
    {'name': 'Model 1', 'layers': [16, 8], 'dropout': 0.09, 'lr': 0.0005, 'batch_size': 32},
    {'name': 'Model 3', 'layers': [16, 8], 'dropout': 0.07, 'lr': 0.0005, 'batch_size': 32},
]

result_dfs = []

for seed in random_seeds:
    print(f"---------- SEED: {seed} ----------")
    result_dfs.append(fine_tune(param_grid=configs, random_seed=seed))

---------- SEED: 42 ----------

--- Testing Configuration: 0:{'name': 'Model 1', 'layers': [16, 8], 'dropout': 0.09, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:  29%|██▉       | 144/500 [00:18<00:46,  7.71epoch/s, loss=1.2280, val_loss=0.6390] 
Epochs:  25%|██▍       | 123/500 [00:16<00:51,  7.29epoch/s, loss=0.4285, val_loss=0.4137]

[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 47ms/step




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


--- Testing Configuration: 1:{'name': 'Model 3', 'layers': [16, 8], 'dropout': 0.07, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:  27%|██▋       | 137/500 [00:18<00:48,  7.45epoch/s, loss=0.9704, val_loss=0.5801] 
Epochs:  27%|██▋       | 136/500 [00:18<00:50,  7.20epoch/s, loss=0.4507, val_loss=0.4127]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


--- Experiment Results Summary ---
      name   layers  dropout      lr  batch_size      PICP       MPIW
0  Model 1  [16, 8]     0.09  0.0005          32  0.964286  33.965977
1  Model 3  [16, 8]     0.07  0.0005          32  0.958333  33.643951
---------- SEED: 123 ----------

--- Testing Configuration: 0:{'name': 'Model 1', 'layers': [16, 8], 'dropout': 0.09, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:  23%|██▎       | 117/500 [00:16<00:53,  7.21epoch/s, loss=0.7339, val_loss=0.6134] 
Epochs:  15%|█▌        | 77/500 [00:11<01:00,  6.99epoch/s, loss=0.4287, val_loss=0.4487]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


--- Testing Configuration: 1:{'name': 'Model 3', 'layers': [16, 8], 'dropout': 0.07, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:  27%|██▋       | 136/500 [00:18<00:49,  7.30epoch/s, loss=0.9493, val_loss=0.6383] 
Epochs:  15%|█▌        | 77/500 [00:11<01:00,  6.99epoch/s, loss=0.4066, val_loss=0.4680]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


--- Experiment Results Summary ---
      name   layers  dropout      lr  batch_size      PICP       MPIW
0  Model 1  [16, 8]     0.09  0.0005          32  0.940476  34.290947
1  Model 3  [16, 8]     0.07  0.0005          32  0.940476  35.771069
---------- SEED: 888 ----------

--- Testing Configuration: 0:{'name': 'Model 1', 'layers': [16, 8], 'dropout': 0.09, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:  23%|██▎       | 116/500 [00:15<00:52,  7.31epoch/s, loss=0.7016, val_loss=0.6180] 
Epochs:  25%|██▌       | 127/500 [00:17<00:51,  7.28epoch/s, loss=0.4340, val_loss=0.3857]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


--- Testing Configuration: 1:{'name': 'Model 3', 'layers': [16, 8], 'dropout': 0.07, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:  28%|██▊       | 140/500 [00:19<00:49,  7.26epoch/s, loss=1.2185, val_loss=0.6438] 
Epochs:  27%|██▋       | 133/500 [00:18<00:50,  7.28epoch/s, loss=0.4388, val_loss=0.3883]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


--- Experiment Results Summary ---
      name   layers  dropout      lr  batch_size      PICP       MPIW
0  Model 1  [16, 8]     0.09  0.0005          32  0.958333  32.857838
1  Model 3  [16, 8]     0.07  0.0005          32  0.952381  33.010201
---------- SEED: 1337 ----------

--- Testing Configuration: 0:{'name': 'Model 1', 'layers': [16, 8], 'dropout': 0.09, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:  27%|██▋       | 136/500 [00:18<00:48,  7.46epoch/s, loss=1.5862, val_loss=0.6295] 
Epochs:  37%|███▋      | 184/500 [00:24<00:41,  7.64epoch/s, loss=0.4270, val_loss=0.4336]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


--- Testing Configuration: 1:{'name': 'Model 3', 'layers': [16, 8], 'dropout': 0.07, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:  21%|██▏       | 107/500 [00:15<00:55,  7.03epoch/s, loss=0.7457, val_loss=0.5245] 
Epochs:  25%|██▍       | 123/500 [00:16<00:51,  7.31epoch/s, loss=0.4208, val_loss=0.4384]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


--- Experiment Results Summary ---
      name   layers  dropout      lr  batch_size      PICP       MPIW
0  Model 1  [16, 8]     0.09  0.0005          32  0.958333  30.155779
1  Model 3  [16, 8]     0.07  0.0005          32  0.952381  30.428223
---------- SEED: 2024 ----------

--- Testing Configuration: 0:{'name': 'Model 1', 'layers': [16, 8], 'dropout': 0.09, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:  23%|██▎       | 115/500 [00:16<00:54,  7.05epoch/s, loss=0.6646, val_loss=0.6004] 
Epochs:  16%|█▌        | 80/500 [00:11<00:58,  7.14epoch/s, loss=0.4493, val_loss=0.4821]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 


--- Testing Configuration: 1:{'name': 'Model 3', 'layers': [16, 8], 'dropout': 0.07, 'lr': 0.0005, 'batch_size': 32} ---


Epochs:  23%|██▎       | 114/500 [00:16<00:54,  7.09epoch/s, loss=0.7292, val_loss=0.5824] 
Epochs:  14%|█▍        | 72/500 [00:10<01:01,  6.96epoch/s, loss=0.4164, val_loss=0.4695]


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


--- Experiment Results Summary ---
      name   layers  dropout      lr  batch_size      PICP       MPIW
0  Model 1  [16, 8]     0.09  0.0005          32  0.946429  35.182964
1  Model 3  [16, 8]     0.07  0.0005          32  0.946429  35.425026


In [16]:
for result_df, seed in zip(result_dfs, random_seeds):
    result_df['Seed'] = [seed] * len(result_df)

In [18]:
all_results = pd.concat(result_dfs, ignore_index=True)

# Group by the model's name to analyze its performance across seeds
summary_df = all_results.groupby('name').agg(
    avg_PICP=('PICP', 'mean'),
    std_PICP=('PICP', 'std'),
    avg_MPIW=('MPIW', 'mean'),
    std_MPIW=('MPIW', 'std')
).reset_index()

# Add a column to check if the average performance meets the constraint
summary_df['meets_constraint'] = summary_df['avg_PICP'] >= 0.95

print("\n\n--- Final Averaged Performance Summary ---")
print(summary_df.round(4))



--- Final Averaged Performance Summary ---
      name  avg_PICP  std_PICP   avg_MPIW  std_MPIW  meets_constraint
0  Model 1    0.9536    0.0098  33.290699    1.9399              True
1  Model 3    0.9500    0.0068  33.655701    2.1470              True


In [23]:
all_results

Unnamed: 0,name,layers,dropout,lr,batch_size,PICP,MPIW,Seed
0,Model 1,"[16, 8]",0.09,0.0005,32,0.964286,33.965977,42
1,Model 3,"[16, 8]",0.07,0.0005,32,0.958333,33.643951,42
2,Model 1,"[16, 8]",0.09,0.0005,32,0.940476,34.290947,123
3,Model 3,"[16, 8]",0.07,0.0005,32,0.940476,35.771069,123
4,Model 1,"[16, 8]",0.09,0.0005,32,0.958333,32.857838,888
5,Model 3,"[16, 8]",0.07,0.0005,32,0.952381,33.010201,888
6,Model 1,"[16, 8]",0.09,0.0005,32,0.958333,30.155779,1337
7,Model 3,"[16, 8]",0.07,0.0005,32,0.952381,30.428223,1337
8,Model 1,"[16, 8]",0.09,0.0005,32,0.946429,35.182964,2024
9,Model 3,"[16, 8]",0.07,0.0005,32,0.946429,35.425026,2024


# Exhaustive Search

In [7]:
import itertools

architectures = [
    [2], [4], [8], [16], [4, 2], [8, 4]
]

dropout_rates = [0.0, 0.1, 0.2]

learning_rates = [0.001, 0.0005]

batch_sizes = [16, 32]

param_grid = []
param_combinations = itertools.product(architectures, dropout_rates, learning_rates, batch_sizes)

for combo in param_combinations:
    param_grid.append({
        'layers': combo[0],
        'dropout': combo[1],
        'lr': combo[2],
        'batch_size': combo[3]
    })

# This will generate 6 * 3 * 2 * 2 = 72 experiments, which is a very thorough search.
print(f"Generated {len(param_grid)} parameter combinations to test.")

Generated 72 parameter combinations to test.
