In [1]:
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
import seaborn as sns

In [2]:
sns.set_style("whitegrid")

# Must match with training file train.py
mlflow.set_tracking_uri("http://localhost:" + "32770")
client: MlflowClient = MlflowClient()
experiment = client.get_experiment_by_name("CF_r1.0_0.5_0.1_e50_b1024_v3")

runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id])
#print(f"Found {len(runs)} runs")
print(runs.columns)

Index(['run_id', 'experiment_id', 'status', 'artifact_uri', 'start_time',
       'end_time', 'metrics.train_loss', 'metrics.val_loss', 'metrics.val_f1',
       'metrics.f1_score', 'metrics.accuracy', 'metrics.recall',
       'metrics.precision', 'params.num_majority', 'params.num_layers',
       'params.epochs', 'params.loss_params', 'params.batch_size',
       'params.hidden_size', 'params.ce_weight', 'params.subsample_ratio',
       'params.loss_function', 'params.num_minority', 'params.learning_rate',
       'params.imbalance_ratio', 'tags.mlflow.source.name',
       'tags.mlflow.source.type', 'tags.mlflow.runName',
       'tags.mlflow.source.git.commit', 'tags.mlflow.user'],
      dtype='object')


In [3]:
all_results = []

for _, row in runs.iterrows():
    result = {
        'imbalance_ratio': float(row['params.imbalance_ratio']),
        'loss_fn': row['params.loss_function'],
        'f1_score': row['metrics.f1_score'],
        'run_id': row['run_id']
    }

    # Extract loss-specific parameters
    if row['params.loss_function'] == 'focal':
        result['alpha'] = row['params.alpha']
        result['gamma'] = row['params.gamma']
    elif row['params.loss_function'] == 'wce':
        result['weight'] = row['params.ce_weight']

    # Add all metrics columns
    metrics_cols = [col for col in row.index if col.startswith('metrics.')]
    for metric_col in metrics_cols:
        metric_name = metric_col.replace('metrics.', '')
        result[metric_name] = row[metric_col]

    all_results.append(result)

results_df = pd.DataFrame(all_results)
print(results_df)

   imbalance_ratio loss_fn  f1_score                            run_id weight  \
0       577.876016     wce       NaN  e1d1097aacb143aeaca9b6a902898d08    577   
1       577.876016     wce  0.793970  b9398de37f684915b1431246cb88455d     10   
2       577.876016     wce  0.810256  a3be7136113546d8bc8177cc5684b60d      5   
3       577.876016      ce  0.834225  252c70c83bba4c41a83f3c926b721006    NaN   

   train_loss  val_loss    val_f1  accuracy    recall  precision  
0    0.026621  1.462172  0.425743       NaN       NaN        NaN  
1    0.001242  0.046033  0.831683  0.999280  0.806122   0.782178  
2    0.001206  0.015256  0.826923  0.999350  0.806122   0.814433  
3    0.000559  0.003802  0.901099  0.999456  0.795918   0.876404  


In [None]:
# Plot final f1 score for every loss and imbalance ratio

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (16, 12)

# Create loss_variant label
def loss_variant_label(row):
    if row['loss_fn'] == 'focal':
        return f"focal (α={row['alpha']}, γ={row['gamma']})"
    elif row['loss_fn'] == 'wce':
        return f"wce (weight={row['weight']})"
    else:
        return row['loss_fn']

results_df['loss_variant'] = results_df.apply(loss_variant_label, axis=1)

plt.figure(figsize=(16, 6))
sns.barplot(
    data=results_df,
    x='imbalance_ratio',
    y='f1_score',
    hue='loss_variant'
)
plt.title('F1 Score by Loss Function Variant')
plt.ylabel('F1 Score')
plt.xlabel('Imbalance Ratio')
plt.legend(title='Loss Function Variant', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks()
plt.tight_layout()
plt.show()

In [None]:
# Plot Recall and Precision for each loss

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (16, 6)

x_labels = results_df['imbalance_ratio'].unique()
loss_variants = results_df['loss_variant'].unique()
metrics = ['precision', 'recall']

n_lv = len(loss_variants)
n_metrics = len(metrics)
n_groups = len(x_labels)

group_width = 0.8
width = group_width / (n_lv * n_metrics)

x = np.arange(n_groups)

handles = []

plt.figure(figsize=(16, 6))
for i, lv in enumerate(loss_variants):
    subset = results_df[results_df['loss_variant'] == lv].sort_values('imbalance_ratio')
    for j, metric in enumerate(metrics):
        xpos = x - group_width/2 + i*width*n_metrics + j*width + width/2
        bar = plt.bar(
            xpos,
            subset[metric],
            width=width,
            color='blue' if metric=='precision' else 'red',
            alpha=1
        )
        if i == 0:
            handles.append(bar[0])

# Set x-ticks at the center of each group
plt.xticks(x, [str(val) for val in x_labels])
plt.ylim(0, 1)
plt.title('Precision& Recall by Loss Function Variant')
plt.ylabel('Score')
plt.xlabel('Imbalance Ratio')
plt.legend(title='Recall & Precision', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


In [4]:
# Plot convergence based on the best scores


def get_best_runs_per_config(all_runs: pd.DataFrame, num_layers: int) -> pd.DataFrame:
    """
    Extract the best runs per configuration (imbalance_ratio + loss_function)
    filtered by number of layers.
    """
    df = all_runs.copy()

    # Filter by number of layers (small or big model)
    df = df[df['params.num_layers'].astype(int) == num_layers]

    # Extract relevant columns
    df['imbalance_ratio'] = df['params.imbalance_ratio'].astype(float)
    df['loss_function'] = df['params.loss_function']

    # Group by imbalance ratio and loss function
    group_cols = ['imbalance_ratio', 'loss_function']

    # Get index of best val_f1 per group
    best_idx = df.groupby(group_cols)['metrics.val_f1'].idxmax()
    best = df.loc[best_idx].reset_index(drop=True)

    return best

best = get_best_runs_per_config(runs, num_layers=2)
display(best[['run_id', 'imbalance_ratio', 'loss_function', 'metrics.val_f1']])

client = MlflowClient()

def fetch_val_f1_history(best_df: pd.DataFrame, num_layer: int) -> pd.DataFrame:
    rows = []

    for _, row in best_df.iterrows():
        run_id = row['run_id']
        hist = client.get_metric_history(run_id, "val_f1")

        for m in hist:
            rows.append({
                "step": m.step,
                "val_f1": m.value,
                "imbalance_ratio": float(row['imbalance_ratio']),
                "loss_function": row['loss_function'],
                "num_layer": num_layer,
            })

    return pd.DataFrame(rows)

hist_small = fetch_val_f1_history(best, 2)


def plot_convergence_grid(hist_df: pd.DataFrame, num_layer: int):
    if hist_df.empty:
        print(f"No history found for {num_layer} layers")
        return

    df = hist_df.copy()
    # Use imbalance ratio only for config labeling
    df['config'] = df.apply(lambda r: f"imb={r['imbalance_ratio']:.2f}", axis=1)

    g = sns.relplot(
        data=df,
        x="step",
        y="val_f1",
        hue="loss_function",
        kind="line",
        col="config",
        col_wrap=4,
        height=3,
        facet_kws={"sharex": True, "sharey": True},
    )
    g.set_axis_labels("Epoch", "Validation F1")
    g.fig.subplots_adjust(top=0.85)
    g.fig.suptitle(f"val_f1 Convergence")
    plt.show()

plot_convergence_grid(hist_small, 2)