In [None]:
import matplotlib.pyplot as plt
import pandas as pd

pd.set_option('display.max_columns', 500)
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({'font.size': 14})

SAVE_FIGURES = False

In [None]:
df = pd.read_csv('runs.csv')
df.head()

In [None]:
df.columns

In [None]:
META_COLUMS = [
    'Start Time',
    'Duration',
    'Run ID',
    'Source Type',
    'Source Name',
    'User',
    'Status',
    'Dataset',
    'Commit ID',
    'Branch',
]

CONST_COLUMNS = [
    'early_stopping',
    'image_size',
    'learning_rate',
    'max_epochs',
    'monitor',
    'patience',
]

METRICS_COLUMS = [
    'test_f1s',
    'test_jaccard',
    'val_f1s',
    'val_jaccard',
]

In [None]:
df = df.drop(columns=META_COLUMS + CONST_COLUMNS)
df.head()

In [None]:
# fill NaNs: augmentation -> True, batch_size ->  8, encoder_weights -> imagenet, frozen_encoder -> True

df['augmentation'] = df['augmentation'].fillna(True)
df['batch_size'] = df['batch_size'].fillna(8)
df['encoder_weights'] = df['encoder_weights'].fillna('imagenet')
df['frozen_encoder'] = df['frozen_encoder'].fillna(True)

In [None]:
df.isna().sum().sort_values(ascending=False)

In [None]:
df.sort_values(by='test_f1s', ascending=False).head(3)

In [None]:
# Select rows where encoder starts with 'efficientnet' 
# and decoder is 'unet' or 'unetplusplus'.
df_sel = (
    df[
        (df['encoder'].str.startswith('efficientnet'))
        & (df['decoder'].isin(['unet', 'unetplusplus']))
    ]
    .sort_values(by='test_f1s', ascending=False)
    .head(10)
)

df_sorted = df_sel.sort_values('encoder')
df_sorted['encoder'] = df_sorted['encoder'].str.replace(
    'efficientnet-b', 'EfficientNet-B')

# Get unique decoders for plotting
decoders = df_sorted['decoder'].unique()

plt.figure(figsize=(12, 7))

for decoder in decoders:
    subset = df_sorted[df_sorted['decoder'] == decoder]
    plt.plot(
        subset['encoder'],
        subset['test_f1s'],
        marker='o',
        linestyle='-' if decoder == 'unetplusplus' else '--',
        linewidth=2,
    )

# Annotate the highest score
max_row = df_sorted.loc[df_sorted['test_f1s'].idxmax()]
max_encoder = max_row['encoder']
max_test_f1s = max_row['test_f1s']
plt.annotate(
    f'{max_test_f1s:.3f}',
    xy=(max_encoder, max_test_f1s),  # type: ignore
    # Adjust text position
    xytext=(max_encoder, max_test_f1s + 0.01),  # type: ignore
    arrowprops=dict(facecolor='black', shrink=0.1),
    ha='center',
)

plt.legend(['UNet++', 'U-Net'], title='Decoder', frameon=True)
plt.xlabel('Encoder')
plt.ylabel('Test F1 Scores')

if SAVE_FIGURES:
    plt.savefig('top10_efficientnet_unet_unetplusplus.pdf')

plt.show()

In [None]:
# Format table for export
df_table = df_sel[['decoder', 'encoder'] + METRICS_COLUMS].copy()
df_table['decoder'] = df_table['decoder'].replace(
    {'unet': 'U-Net', 'unetplusplus': 'UNet++'}
)
df_table['encoder'] = df_table['encoder'].str.replace('efficientnet-b', 'B')

# Add number of parameters to encoder column e.g. 'B0 (5.3M)'
n_params_m = {
    'B0': '5.3M',
    'B1': '7.8M',
    'B2': '9.2M',
    'B3': '12M',
    'B4': '19M',
}
df_table['encoder'] += (
    ' (' + df_table['encoder'].str.extract(r'(B\d)')[0].map(n_params_m) + ')'
)

df_table.columns = pd.MultiIndex.from_tuples(
    [
        (arch := 'Architecture', 'Decoder'),
        (arch, 'EfficientNet (\\#params)'),
        (test_set := 'Test set', 'F1 Score'),
        (test_set, 'IoU'),
        # (test_set, 'Dice loss'),
        (val_set := 'Validation set', 'F1 Score'),
        (val_set, 'IoU'),
        # (val_set, 'Dice loss'),
    ]
)
df_table.head()

In [None]:
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_latex.html
df_table.to_latex(
    'table.tex',
    index=False,
    float_format='%.3f',
    multicolumn_format='c',
    caption='Best performing models with EfficientNet encoder and UNet or UNet++ decoder',
    label='tab:efficientnet_unet_unetplusplus',
)