In [1]:
"""
Hyperparameter selection
"""

import os
import geopandas as gpd
import pandas as pd
import rioxarray as rxr
import earthpy.plot as ep
import seaborn as sns
import matplotlib as mpl
import numpy as np

from matplotlib import pyplot as plt
%matplotlib inline

from scipy.signal import argrelextrema
from sklearn.preprocessing import MinMaxScaler

print(os.getcwd())

projdir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/earth-lab/opp-rooftop-mapping/'

/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/earth-lab/opp-rooftop-mapping/code


In [2]:
# Read in the tuning dataframe
fp = os.path.join(projdir,'results/resnet18/tuning/resnet18_grid_search_results.csv')
tuning_df = pd.read_csv(fp)
tuning_df.head(12)

Unnamed: 0,trial,epoch,batch_size,window_size,learning_rate,weight_decay,accuracy,f1_score,train_loss,val_loss,time_ep
0,0,1,32,78,0.01,1e-05,38.454576,0.454619,1.772893,1.466633,3.25
1,0,2,32,78,0.01,1e-05,43.240186,0.436734,1.482872,1.456673,3.35
2,0,3,32,78,0.01,1e-05,57.732775,0.583495,1.36779,1.228961,3.42
3,0,4,32,78,0.01,1e-05,50.367689,0.533238,1.230571,1.303825,3.38
4,0,5,32,78,0.01,1e-05,41.02274,0.461519,1.090379,1.758595,3.33
5,0,6,32,78,0.01,1e-05,38.443263,0.441192,0.905829,1.736805,3.38
6,0,7,32,78,0.01,1e-05,49.07795,0.5128,0.747294,1.414276,3.4
7,0,8,32,78,0.01,1e-05,61.217332,0.619434,0.452533,1.197468,3.4
8,0,9,32,78,0.01,1e-05,64.656635,0.645323,0.324811,1.208729,3.45
9,0,10,32,78,0.01,1e-05,64.645322,0.644943,0.273739,1.269782,3.25


In [3]:
# Local minima approach

In [13]:
# Function to find local minima in validation loss
def find_local_minima(df, col):
    return argrelextrema(df[col].values, np.less)[0]

# Apply this function to each trial
best_epochs = []
for trial in tuning_df['trial'].unique():
    
    trial_df = tuning_df[tuning_df['trial'] == trial]
    local_min_idx = find_local_minima(trial_df, 'val_loss')
    
    if len(local_min_idx) > 0:
        best_epoch_idx = local_min_idx[0]  # Choose the first local minimum
        best_epochs.append(trial_df.iloc[best_epoch_idx])

# Create a DataFrame of the best epochs based on local minima
best_epochs_df = pd.DataFrame(best_epochs)

# Now you can select the best overall parameters based on F1-score or other metrics
best_params = best_epochs_df.loc[best_epochs_df['f1_score'].idxmax()]
print(best_params)

trial             47.000000
epoch              3.000000
batch_size       128.000000
window_size      256.000000
learning_rate      0.001000
weight_decay       0.000010
accuracy          68.469284
f1_score           0.677425
train_loss         0.028514
val_loss           1.140532
time_ep           10.340000
val_loss_n         0.070483
time_ep_n          0.861702
f1_score_n         0.995521
val_loss_ni        0.929517
time_ep_ni         0.138298
score              0.804275
Name: 566, dtype: float64


In [5]:
# Weighted scoring

In [7]:
scaler = MinMaxScaler()

# Normalize val_loss, time_ep, and f1_score
tuning_df[['val_loss_n', 'time_ep_n', 'f1_score_n']] = scaler.fit_transform(
    tuning_df[['val_loss', 'time_ep', 'f1_score']]
)

# Define weights for each criterion
f1_weight = 0.5
val_loss_weight = 0.3
time_weight = 0.2

# Calculate the composite score
tuning_df['score'] = (
    tuning_df['f1_score_n'] * f1_weight + 
    (1 - tuning_df['val_loss_n']) * val_loss_weight + 
    (1 - tuning_df['time_ep_n']) * time_weight
)

# Select the best parameters based on the composite score
best_params_weight = tuning_df.loc[tuning_df['score'].idxmax()]
print(best_params_weight)

trial            24.000000
epoch            12.000000
batch_size       64.000000
window_size      78.000000
learning_rate     0.010000
weight_decay      0.010000
accuracy         65.697477
f1_score          0.648815
train_loss        0.053144
val_loss          1.180774
time_ep           3.420000
val_loss_n        0.095012
time_ep_n         0.043735
f1_score_n        0.924252
val_loss_ni       0.904988
time_ep_ni        0.956265
score             0.924875
Name: 299, dtype: float64


In [None]:
# Summarizing values across epochs

In [9]:
# 90th percentile metrics across epochs for each trial
tuning_pct90 = tuning_df.groupby('trial').agg({
    'f1_score': lambda x: np.percentile(x, 90),
    'val_loss_n': lambda x: np.percentile(x, 10),  # Use the 10th percentile for validation loss (lower is better)
    'time_ep_n': 'mean',
    'batch_size': 'first',
    'window_size': 'first',
    'learning_rate': 'first',
    'weight_decay': 'first'
}).reset_index()

In [10]:
# Define weights for each criterion
f1_weight = 0.5
val_loss_weight = 0.3
time_weight = 0.2

# Calculate the composite score
tuning_pct90['score'] = (
    tuning_pct90['f1_score'] * f1_weight + 
    (1 - tuning_pct90['val_loss_n']) * val_loss_weight + 
    (1 - tuning_pct90['time_ep_n']) * time_weight
)

# Select the best parameters based on the composite score
best_params_weight_p90 = tuning_pct90.loc[tuning_pct90['score'].idxmax()]
print(best_params_weight_p90)

trial            24.000000
f1_score          0.646643
val_loss_n        0.076282
time_ep_n         0.043243
batch_size       64.000000
window_size      78.000000
learning_rate     0.010000
weight_decay      0.010000
score             0.791788
Name: 24, dtype: float64
