In [39]:
import mlflow
import pandas as pd
import os

# Set the tracking URI to the mlruns folder
mlflow.set_tracking_uri("file:///home/rafael/private/ufrj/CPE727-trabalho-final/mlruns")

# Get all runs
client = mlflow.tracking.MlflowClient()

# Get the experiment
experiment_id = "914356777944626268"
all_runs = mlflow.search_runs(
    experiment_ids=[experiment_id],
    filter_string="",
    order_by=["start_time DESC"],
    max_results=10000
)

print(f"Total runs found: {len(all_runs)}")

# Filter for completed runs only
completed_runs = all_runs[all_runs['status'] == 'FINISHED'].copy()
print(f"Completed runs: {len(completed_runs)}")

# Create a function to recursively get all parent parameters and metrics
# Use ALL runs (not just completed) for parent lookup
def get_all_parent_data(run_id, all_runs_df):
    """Recursively get parameters and metrics from all parent runs"""
    params = {}
    metrics = {}

    # Find this run in ALL runs
    run_data = all_runs_df[all_runs_df['run_id'] == run_id]
    if len(run_data) == 0:
        return params, metrics

    run_row = run_data.iloc[0]

    # Check if this run has a parent
    if 'tags.mlflow.parentRunId' in run_row and pd.notna(run_row['tags.mlflow.parentRunId']):
        parent_id = run_row['tags.mlflow.parentRunId']
        # Recursively get parent's data first (search in ALL runs, not just completed)
        parent_params, parent_metrics = get_all_parent_data(parent_id, all_runs_df)
        params.update(parent_params)
        metrics.update(parent_metrics)

    # Add this run's parameters (child overrides parent)
    for col in run_row.index:
        if col.startswith('params.') and pd.notna(run_row[col]):
            params[col] = run_row[col]
        elif col.startswith('metrics.') and pd.notna(run_row[col]):
            metrics[col] = run_row[col]

    return params, metrics

# Separate parent and child runs
if 'tags.mlflow.parentRunId' in completed_runs.columns:
    parent_runs_df = completed_runs[completed_runs['tags.mlflow.parentRunId'].isna()].copy()
    child_runs_raw = completed_runs[completed_runs['tags.mlflow.parentRunId'].notna()].copy()
    print(f"Parent runs (completed): {len(parent_runs_df)}")
    print(f"Child runs (completed): {len(child_runs_raw)}")

    # Enrich child runs with ALL ancestor parameters and metrics
    # Use ALL runs for parent lookup (including deleted/failed ones)
    print("\nEnriching child runs with all ancestor data (multi-level hierarchy)...")
    print("(Searching in ALL runs, including non-completed ones)")
    enriched_count = 0

    for idx, child_row in child_runs_raw.iterrows():
        run_id = child_row['run_id']

        # Get all parameters and metrics from the entire parent hierarchy
        # IMPORTANT: Use all_runs here, not completed_runs
        all_params, all_metrics = get_all_parent_data(run_id, all_runs)

        # Apply parameters
        for param_col, param_value in all_params.items():
            if param_col not in child_runs_raw.columns or pd.isna(child_row[param_col]):
                child_runs_raw.at[idx, param_col] = param_value
                enriched_count += 1

        # Apply metrics (only if child doesn't have them)
        for metric_col, metric_value in all_metrics.items():
            if metric_col not in child_runs_raw.columns or pd.isna(child_row[metric_col]):
                child_runs_raw.at[idx, metric_col] = metric_value

    print(f"✓ Enriched child runs with all ancestor parameters and metrics")
    print(f"  Total enrichments applied: {enriched_count}")

    # Verify enrichment
    print("\nVerification:")
    print(f"  - params.model_name populated: {child_runs_raw['params.model_name'].notna().sum()} / {len(child_runs_raw)}")
    if 'params.learning_rate' in child_runs_raw.columns:
        print(f"  - params.learning_rate populated: {child_runs_raw['params.learning_rate'].notna().sum()} / {len(child_runs_raw)}")
    if 'metrics.val_accuracy' in child_runs_raw.columns:
        print(f"  - metrics.val_accuracy populated: {child_runs_raw['metrics.val_accuracy'].notna().sum()} / {len(child_runs_raw)}")

    child_runs_raw

Total runs found: 136
Completed runs: 124
Parent runs (completed): 5
Child runs (completed): 119

Enriching child runs with all ancestor data (multi-level hierarchy)...
(Searching in ALL runs, including non-completed ones)
✓ Enriched child runs with all ancestor parameters and metrics
  Total enrichments applied: 716

Verification:
  - params.model_name populated: 119 / 119
  - params.learning_rate populated: 119 / 119
  - metrics.val_accuracy populated: 101 / 119


In [44]:
# Work with enriched child runs
child_runs_df = child_runs_raw.copy()

# Get all parameter columns
param_cols = [col for col in child_runs_df.columns if col.startswith('params.')]
metric_cols = [col for col in child_runs_df.columns if col.startswith('metrics.')]

print("Available parameters:")
print(param_cols)
print("\nAvailable metrics:")
print(metric_cols)

# Create a clean results table
base_cols = ['run_id', 'start_time', 'end_time', 'status']
if 'tags.mlflow.runName' in child_runs_df.columns:
    base_cols.insert(1, 'tags.mlflow.runName')

results_df = child_runs_df[base_cols + param_cols + metric_cols].copy()

# Rename run name column if it exists
if 'tags.mlflow.runName' in results_df.columns:
    results_df = results_df.rename(columns={'tags.mlflow.runName': 'run_name'})

# Convert times to datetime
results_df['start_time'] = pd.to_datetime(results_df['start_time'], unit='ms')
results_df['end_time'] = pd.to_datetime(results_df['end_time'], unit='ms')
results_df['duration_minutes'] = (results_df['end_time'] - results_df['start_time']).dt.total_seconds() / 60

# Sort by model and fold
sort_by = []
if 'params.model_name' in results_df.columns:
    sort_by.append('params.model_name')
if 'params.fold' in results_df.columns:
    sort_by.append('params.fold')

if sort_by:
    results_df = results_df.sort_values(sort_by)

print(f"\nTotal child runs: {len(results_df)}")
results_df.head(20)

Available parameters:
['params.fold', 'params.batch_size', 'params.freeze_backbone', 'params.learning_rate', 'params.cv_folds', 'params.total_combinations', 'params.model_name', 'params.dropout', 'params.hidden_layers']

Available metrics:
['metrics.val_loss', 'metrics.val_recall', 'metrics.best_train_loss', 'metrics.val_accuracy', 'metrics.best_val_loss', 'metrics.train_loss', 'metrics.val_f1', 'metrics.val_precision', 'metrics.avg_val_accuracy', 'metrics.avg_val_recall', 'metrics.std_best_val_loss', 'metrics.avg_best_train_loss', 'metrics.avg_val_precision', 'metrics.avg_val_f1', 'metrics.avg_best_val_loss', 'metrics.std_val_accuracy']

Total child runs: 119


Unnamed: 0,run_id,run_name,start_time,end_time,status,params.fold,params.batch_size,params.freeze_backbone,params.learning_rate,params.cv_folds,...,metrics.val_precision,metrics.avg_val_accuracy,metrics.avg_val_recall,metrics.std_best_val_loss,metrics.avg_best_train_loss,metrics.avg_val_precision,metrics.avg_val_f1,metrics.avg_best_val_loss,metrics.std_val_accuracy,duration_minutes
52,d802471bc71646aebff931b28d613a13,fold_0,2025-12-09 10:54:38.168000+00:00,2025-12-09 11:00:22.055000+00:00,FINISHED,0.0,64,,0.001,5,...,0.383139,0.333565,0.333565,0.014519,1.812103,0.336541,0.275797,1.704273,0.020169,5.73145
133,fd4b271020e14860bf5e43f0cb3a9346,fold_0,2025-12-09 04:23:47.736000+00:00,2025-12-09 04:24:43.682000+00:00,FINISHED,0.0,64,,0.001,5,...,0.502067,0.44338,0.44338,0.027726,1.536166,0.535131,0.431159,1.450308,0.019018,0.932433
51,ff92bdf779a6414b8cbabf59b27d586d,fold_1,2025-12-09 11:00:22.057000+00:00,2025-12-09 11:06:12.253000+00:00,FINISHED,1.0,64,,0.001,5,...,0.315983,0.333565,0.333565,0.014519,1.812103,0.336541,0.275797,1.704273,0.020169,5.8366
132,24bb957ea2864ed7ae37903889f6c812,fold_1,2025-12-09 04:24:43.684000+00:00,2025-12-09 04:25:49.603000+00:00,FINISHED,1.0,64,,0.001,5,...,0.540813,0.44338,0.44338,0.027726,1.536166,0.535131,0.431159,1.450308,0.019018,1.09865
50,4b033dbb8d5a4f35a0b18087045537fb,fold_2,2025-12-09 11:06:12.255000+00:00,2025-12-09 11:12:17.409000+00:00,FINISHED,2.0,64,,0.001,5,...,0.295635,0.333565,0.333565,0.014519,1.812103,0.336541,0.275797,1.704273,0.020169,6.0859
131,5c8a38b202194aaeae67995edff9ae88,fold_2,2025-12-09 04:25:49.604000+00:00,2025-12-09 04:26:45.305000+00:00,FINISHED,2.0,64,,0.001,5,...,0.529844,0.44338,0.44338,0.027726,1.536166,0.535131,0.431159,1.450308,0.019018,0.92835
49,736b2367d4ef46b1863021a0a813f51d,fold_3,2025-12-09 11:12:17.411000+00:00,2025-12-09 11:16:48.209000+00:00,FINISHED,3.0,64,,0.001,5,...,0.368546,0.333565,0.333565,0.014519,1.812103,0.336541,0.275797,1.704273,0.020169,4.5133
130,551bf6f2a78a4c50b91c18c55e6a5f51,fold_3,2025-12-09 04:26:45.308000+00:00,2025-12-09 04:27:57.695000+00:00,FINISHED,3.0,64,,0.001,5,...,0.547891,0.44338,0.44338,0.027726,1.536166,0.535131,0.431159,1.450308,0.019018,1.20645
48,bb894394cc0f47e3adc2e0177ae9e0e0,fold_4,2025-12-09 11:16:48.210000+00:00,2025-12-09 11:22:37.855000+00:00,FINISHED,4.0,64,,0.001,5,...,0.319401,0.333565,0.333565,0.014519,1.812103,0.336541,0.275797,1.704273,0.020169,5.827417
129,187b72ab754f466ea5fcc9c21ee00e0e,fold_4,2025-12-09 04:27:57.697000+00:00,2025-12-09 04:29:12.750000+00:00,FINISHED,4.0,64,,0.001,5,...,0.55504,0.44338,0.44338,0.027726,1.536166,0.535131,0.431159,1.450308,0.019018,1.250883


In [21]:
# Create a cleaner summary table focused on key metrics
summary_cols = ['run_name']

# Add key parameters (if they exist)
key_params = ['params.model_name', 'params.learning_rate', 'params.batch_size',
              'params.dropout', 'params.freeze_backbone', 'params.fold']
for col in key_params:
    if col in results_df.columns:
        summary_cols.append(col)

# Add key metrics (if they exist)
key_metrics = ['metrics.test_accuracy', 'metrics.test_loss', 'metrics.val_accuracy',
               'metrics.val_loss', 'metrics.train_accuracy', 'metrics.train_loss']
for col in key_metrics:
    if col in results_df.columns:
        summary_cols.append(col)

# Add duration
summary_cols.append('duration_minutes')

# Filter columns that exist
existing_cols = [col for col in summary_cols if col in results_df.columns]
summary_df = results_df[existing_cols].copy()

# Rename columns for better readability
rename_dict = {col: col.replace('params.', '').replace('metrics.', '') for col in existing_cols}
summary_df = summary_df.rename(columns=rename_dict)

# Round numeric columns
numeric_cols = summary_df.select_dtypes(include=['float64', 'float32']).columns
for col in numeric_cols:
    if 'accuracy' in col.lower():
        summary_df[col] = summary_df[col].round(4)
    elif 'loss' in col.lower():
        summary_df[col] = summary_df[col].round(4)
    elif 'duration' in col.lower():
        summary_df[col] = summary_df[col].round(2)

print(f"Summary table with {len(summary_df)} runs")
summary_df

Summary table with 119 runs


Unnamed: 0,run_name,model_name,learning_rate,batch_size,dropout,freeze_backbone,fold,val_accuracy,val_loss,train_loss,duration_minutes
53,config_0,mlp,0.001,64,0.3,,,,,,28.00
134,config_0,mlp,0.001,64,0.3,,,,,,5.42
28,config_3,resnet50,1e-05,128,,True,,,,,178.81
34,config_2,resnet50,1e-05,64,,True,,,,,184.90
40,config_1,resnet50,0.0001,128,,True,,,,,188.12
...,...,...,...,...,...,...,...,...,...,...,...
104,fold_4,,,,,,4,0.7975,0.8667,0.8411,3.73
110,fold_4,,,,,,4,0.8109,0.6913,0.6939,4.40
116,fold_4,,,,,,4,0.8590,0.4323,0.3124,3.81
122,fold_4,,,,,,4,0.8653,0.4285,0.3171,4.46


In [9]:
# Group by model and aggregate results
if 'model_name' in summary_df.columns:
    # Build aggregation dict dynamically based on available columns
    agg_dict = {}

    if 'val_accuracy' in summary_df.columns:
        agg_dict['val_accuracy'] = ['mean', 'std', 'max']
    if 'val_loss' in summary_df.columns:
        agg_dict['val_loss'] = ['mean', 'std', 'min']
    if 'val_f1' in summary_df.columns:
        agg_dict['val_f1'] = ['mean', 'std', 'max']
    if 'val_precision' in summary_df.columns:
        agg_dict['val_precision'] = ['mean', 'std']
    if 'val_recall' in summary_df.columns:
        agg_dict['val_recall'] = ['mean', 'std']
    if 'duration_minutes' in summary_df.columns:
        agg_dict['duration_minutes'] = ['mean', 'sum']

    if agg_dict:
        model_summary = summary_df.groupby('model_name').agg(agg_dict).round(4)
        print("\nModel Performance Summary:")
        display(model_summary)
    else:
        print("No metrics available for aggregation")
else:
    print("Model name not found in results")


Model Performance Summary:


Unnamed: 0_level_0,val_accuracy,val_accuracy,val_accuracy,val_loss,val_loss,val_loss,duration_minutes,duration_minutes
Unnamed: 0_level_1,mean,std,max,mean,std,min,mean,sum
model_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
mlp,,,,,,,16.71,33.42
resnet50,,,,,,,407.41,814.82
vgg16,,,,,,,256.54,256.54


In [None]:
# Export to CSV for further analysis
output_path = '/home/rafael/private/ufrj/CPE727-trabalho-final/data/08_reporting/mlflow_results.csv'
summary_df.to_csv(output_path, index=False)
print(f"Results exported to: {output_path}")

# Also create a model summary CSV
if 'model_name' in summary_df.columns:
    model_output_path = '/home/rafael/private/ufrj/CPE727-trabalho-final/data/08_reporting/model_summary.csv'
    model_summary.to_csv(model_output_path)
    print(f"Model summary exported to: {model_output_path}")

In [10]:
# Separate parent and child runs for better analysis
parent_runs = results_df[~results_df['run_id'].isin(
    completed_runs[completed_runs['tags.mlflow.parentRunId'].notna()]['run_id']
)].copy()

child_runs_df = results_df[results_df['run_id'].isin(
    completed_runs[completed_runs['tags.mlflow.parentRunId'].notna()]['run_id']
)].copy()

print(f"Parent runs (grid search results): {len(parent_runs)}")
print(f"Child runs (individual fold results): {len(child_runs_df)}")

# Display parent runs with aggregated metrics
if len(parent_runs) > 0:
    parent_cols = ['run_name', 'model_name', 'learning_rate', 'batch_size', 'dropout',
                   'freeze_backbone', 'avg_val_accuracy', 'std_val_accuracy',
                   'avg_best_val_loss', 'std_best_val_loss', 'duration_minutes']

    # Filter to existing columns
    parent_display_cols = [col for col in parent_cols if col in parent_runs.columns]
    parent_summary = parent_runs[parent_display_cols].copy()

    # Sort by validation accuracy
    if 'avg_val_accuracy' in parent_summary.columns:
        parent_summary = parent_summary.sort_values('avg_val_accuracy', ascending=False)

    print("\n=== PARENT RUNS (Aggregated Grid Search Results) ===")
    display(parent_summary)

Parent runs (grid search results): 5
Child runs (individual fold results): 119

=== PARENT RUNS (Aggregated Grid Search Results) ===


Unnamed: 0,run_name,duration_minutes
47,resnet50_cv_gridsearch,731.61015
54,mlp_cv_gridsearch,27.995317
103,vgg16_cv_gridsearch,256.541733
128,resnet50_cv_gridsearch,83.2121
135,mlp_cv_gridsearch,5.417433


In [11]:
# Display child runs (individual fold results)
if len(child_runs_df) > 0:
    child_cols = ['run_name', 'model_name', 'fold', 'learning_rate', 'batch_size',
                  'dropout', 'freeze_backbone', 'val_accuracy', 'val_loss',
                  'best_val_loss', 'train_loss', 'duration_minutes']

    # Filter to existing columns
    child_display_cols = [col for col in child_cols if col in child_runs_df.columns]
    child_summary = child_runs_df[child_display_cols].copy()

    # Sort by model and fold
    sort_cols = []
    if 'model_name' in child_summary.columns:
        sort_cols.append('model_name')
    if 'fold' in child_summary.columns:
        sort_cols.append('fold')
    if 'val_accuracy' in child_summary.columns:
        sort_cols.append('val_accuracy')

    if sort_cols:
        child_summary = child_summary.sort_values(sort_cols, ascending=[True, True, False] if len(sort_cols) == 3 else True)

    print("\n=== CHILD RUNS (Individual Fold Results) ===")
    print(f"Total: {len(child_summary)} runs")
    display(child_summary)


=== CHILD RUNS (Individual Fold Results) ===
Total: 119 runs


Unnamed: 0,run_name,duration_minutes
1,fold_3,116.264183
2,fold_2,118.641867
3,fold_1,119.036183
4,fold_0,116.928983
8,fold_0,115.196900
...,...,...
130,fold_3,1.206450
131,fold_2,0.928350
132,fold_1,1.098650
133,fold_0,0.932433


In [12]:
# Check what columns we actually have in child_runs_df
print("Available columns in child_runs_df:")
print(child_runs_df.columns.tolist())

# Create a comprehensive results table with all available data
all_cols = ['run_name'] + [col for col in child_runs_df.columns if col not in ['run_id', 'run_name', 'start_time', 'end_time', 'status']]
comprehensive_table = child_runs_df[all_cols].copy()

print(f"\n=== COMPREHENSIVE RESULTS TABLE ===")
print(f"Shape: {comprehensive_table.shape}")
display(comprehensive_table.head(20))

Available columns in child_runs_df:
['run_id', 'run_name', 'start_time', 'end_time', 'status', 'params.fold', 'params.batch_size', 'params.freeze_backbone', 'params.learning_rate', 'params.cv_folds', 'params.total_combinations', 'params.model_name', 'params.dropout', 'params.hidden_layers', 'metrics.val_loss', 'metrics.val_recall', 'metrics.best_train_loss', 'metrics.val_accuracy', 'metrics.best_val_loss', 'metrics.train_loss', 'metrics.val_f1', 'metrics.val_precision', 'metrics.avg_val_accuracy', 'metrics.avg_val_recall', 'metrics.std_best_val_loss', 'metrics.avg_best_train_loss', 'metrics.avg_val_precision', 'metrics.avg_val_f1', 'metrics.avg_best_val_loss', 'metrics.std_val_accuracy', 'duration_minutes']

=== COMPREHENSIVE RESULTS TABLE ===
Shape: (119, 27)


Unnamed: 0,run_name,params.fold,params.batch_size,params.freeze_backbone,params.learning_rate,params.cv_folds,params.total_combinations,params.model_name,params.dropout,params.hidden_layers,...,metrics.val_precision,metrics.avg_val_accuracy,metrics.avg_val_recall,metrics.std_best_val_loss,metrics.avg_best_train_loss,metrics.avg_val_precision,metrics.avg_val_f1,metrics.avg_best_val_loss,metrics.std_val_accuracy,duration_minutes
1,fold_3,3.0,,,,,,,,,...,0.930111,,,,,,,,,116.264183
2,fold_2,2.0,,,,,,,,,...,0.931021,,,,,,,,,118.641867
3,fold_1,1.0,,,,,,,,,...,0.928456,,,,,,,,,119.036183
4,fold_0,0.0,,,,,,,,,...,0.931751,,,,,,,,,116.928983
8,fold_0,0.0,,,,,,,,,...,0.931434,,,,,,,,,115.1969
12,fold_3,3.0,,,,,,,,,...,0.974413,,,,,,,,,73.145133
13,fold_2,2.0,,,,,,,,,...,0.971592,,,,,,,,,66.097883
14,fold_1,1.0,,,,,,,,,...,0.97635,,,,,,,,,72.5328
15,fold_0,0.0,,,,,,,,,...,0.978505,,,,,,,,,73.136667
19,fold_1,1.0,,,,,,,,,...,0.979039,,,,,,,,,72.836033


In [58]:
# Create final clean summary table for presentation
# Filter to only fold-level runs (those with a fold number)
fold_level_runs = child_runs_df[child_runs_df['params.fold'].notna()].copy()

print(f"Filtering to fold-level runs only: {len(fold_level_runs)} / {len(child_runs_df)}")

# Extract config number from run_name or parent run
# Config runs typically have names like "config_0", "config_1", etc.
def extract_config_number(row):
    """Extract config number from parent run name or tags"""
    # Check if the run_name contains 'config_'
    if pd.notna(row.get('run_name')) and 'config_' in str(row['run_name']):
        try:
            return int(str(row['run_name']).split('config_')[1])
        except:
            pass

    # Try to get it from the parent run
    if 'tags.mlflow.parentRunId' in row and pd.notna(row['tags.mlflow.parentRunId']):
        parent_id = row['tags.mlflow.parentRunId']
        parent_runs = all_runs[all_runs['run_id'] == parent_id]
        if len(parent_runs) > 0:
            parent_name = parent_runs.iloc[0].get('tags.mlflow.runName', '')
            if 'config_' in str(parent_name):
                try:
                    return int(str(parent_name).split('config_')[1])
                except:
                    pass

    return None

# Add config column
fold_level_runs['config'] = fold_level_runs.apply(extract_config_number, axis=1)

# Add config_run_id column (parent run ID)
if 'tags.mlflow.parentRunId' in fold_level_runs.columns:
    fold_level_runs['config_run_id'] = fold_level_runs['tags.mlflow.parentRunId']
    print(f"Config run IDs added: {fold_level_runs['config_run_id'].notna().sum()} / {len(fold_level_runs)}")

# Calculate duration_minutes from start_time and end_time
if 'start_time' in fold_level_runs.columns and 'end_time' in fold_level_runs.columns:
    fold_level_runs['duration_minutes'] = (fold_level_runs['end_time'] - fold_level_runs['start_time']).dt.total_seconds() / 60

print(f"Config numbers extracted: {fold_level_runs['config'].notna().sum()} / {len(fold_level_runs)}")

final_cols = [
    'run_id',
    'config_run_id',
    'run_name',
    'params.model_name',
    'config',
    'params.fold',
    'params.learning_rate',
    'params.batch_size',
    'params.dropout',
    'params.freeze_backbone',
    'params.hidden_layers',
    'metrics.val_accuracy',
    'metrics.val_loss',
    'metrics.val_f1',
    'metrics.val_precision',
    'metrics.val_recall',
    'metrics.best_val_loss',
    'start_time',
    'end_time',
    'duration_minutes'
]

# Filter to existing columns
final_display_cols = [col for col in final_cols if col in fold_level_runs.columns]
final_table = fold_level_runs[final_display_cols].copy()

# Rename columns for readability
rename_map = {
    'params.model_name': 'Model',
    'config': 'Config',
    'config_run_id': 'Config_Run_ID',
    'params.fold': 'Fold',
    'params.learning_rate': 'LR',
    'params.batch_size': 'Batch',
    'params.dropout': 'Dropout',
    'params.freeze_backbone': 'Freeze',
    'params.hidden_layers': 'Hidden',
    'metrics.val_accuracy': 'Val_Acc',
    'metrics.val_loss': 'Val_Loss',
    'metrics.val_f1': 'Val_F1',
    'metrics.val_precision': 'Val_Prec',
    'metrics.val_recall': 'Val_Rec',
    'metrics.best_val_loss': 'Best_Val_Loss',
    'start_time': 'Start_Time',
    'end_time': 'End_Time',
    'duration_minutes': 'Duration_min',
    'run_name': 'Run',
    'run_id': 'Run_ID'
}
final_table = final_table.rename(columns=rename_map)

# Round numeric columns (including Duration_min)
numeric_cols = final_table.select_dtypes(include=['float64', 'float32']).columns
for col in numeric_cols:
    final_table[col] = final_table[col].round(4)

# Reorder columns to put time-related columns at the end
if all(col in final_table.columns for col in ['Start_Time', 'End_Time', 'Duration_min']):
    # Get all columns except time columns
    other_cols = [col for col in final_table.columns if col not in ['Start_Time', 'End_Time', 'Duration_min']]
    # Reorder: other columns first, then time columns
    final_table = final_table[other_cols + ['Start_Time', 'End_Time', 'Duration_min']]

# Sort by Model, Config, and Fold
sort_by = []
if 'Model' in final_table.columns:
    sort_by.append('Model')
if 'Config' in final_table.columns:
    sort_by.append('Config')
if 'Fold' in final_table.columns:
    sort_by.append('Fold')
if sort_by:
    final_table = final_table.sort_values(sort_by)

print(f"\n{'='*80}")
print(f"FINAL MLFLOW RESULTS TABLE - {len(final_table)} fold-level runs")
print(f"{'='*80}")
display(final_table)

# Export to CSV
final_table.to_csv('/home/rafael/private/ufrj/CPE727-trabalho-final/data/08_reporting/mlflow_final_results.csv', index=False)
print(f"\nTable exported to: data/08_reporting/mlflow_final_results.csv")


Filtering to fold-level runs only: 101 / 119
Config run IDs added: 101 / 101
Config numbers extracted: 101 / 101

FINAL MLFLOW RESULTS TABLE - 101 fold-level runs


Unnamed: 0,Run_ID,Config_Run_ID,Model,Config,Fold,LR,Batch,Dropout,Freeze,Hidden,Val_Acc,Val_Loss,Val_F1,Val_Prec,Val_Rec,Best_Val_Loss,Start_Time,End_Time,Duration_min
52,d802471bc71646aebff931b28d613a13,60bc05e6a98c4cbd9e8db8dcb189e63c,mlp,0,0,0.001,64,0.3,,"[512, 256]",0.2958,1.8003,0.2312,0.3831,0.2958,1.6781,2025-12-09 10:54:38.168000+00:00,2025-12-09 11:00:22.055000+00:00,5.7314
133,fd4b271020e14860bf5e43f0cb3a9346,b9e6967599c648558771d4441f01b867,mlp,0,0,0.001,64,0.3,,"[512, 256]",0.4123,1.5577,0.3723,0.5021,0.4123,1.5027,2025-12-09 04:23:47.736000+00:00,2025-12-09 04:24:43.682000+00:00,0.9324
51,ff92bdf779a6414b8cbabf59b27d586d,60bc05e6a98c4cbd9e8db8dcb189e63c,mlp,0,1,0.001,64,0.3,,"[512, 256]",0.3456,1.7290,0.2912,0.3160,0.3456,1.7029,2025-12-09 11:00:22.057000+00:00,2025-12-09 11:06:12.253000+00:00,5.8366
132,24bb957ea2864ed7ae37903889f6c812,b9e6967599c648558771d4441f01b867,mlp,0,1,0.001,64,0.3,,"[512, 256]",0.4354,1.4856,0.4282,0.5408,0.4354,1.4418,2025-12-09 04:24:43.684000+00:00,2025-12-09 04:25:49.603000+00:00,1.0986
50,4b033dbb8d5a4f35a0b18087045537fb,60bc05e6a98c4cbd9e8db8dcb189e63c,mlp,0,2,0.001,64,0.3,,"[512, 256]",0.3475,1.7798,0.2746,0.2956,0.3475,1.7163,2025-12-09 11:06:12.255000+00:00,2025-12-09 11:12:17.409000+00:00,6.0859
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,b80536ced7074cdbb8d5dd58aa0645a6,63ef075bc53441918e2b2530bf2468f9,vit_b_16,0,0,1e-05,64,,True,,0.9308,0.2638,0.9310,0.9318,0.9308,0.2638,2025-12-10 14:30:56.396000+00:00,2025-12-10 16:27:52.135000+00:00,116.9290
8,9bf6c39575bf447b931cdf531d612d26,4ebdc4216ec14e408bbf5dfe545e53e3,vit_b_16,0,0,1e-05,64,,True,,0.9303,0.2634,0.9305,0.9314,0.9303,0.2634,2025-12-10 10:19:35.764000+00:00,2025-12-10 12:14:47.578000+00:00,115.1969
3,ab86d7b38fef4aa8beb9e19ea38f83e6,63ef075bc53441918e2b2530bf2468f9,vit_b_16,0,1,1e-05,64,,True,,0.9273,0.2658,0.9275,0.9285,0.9273,0.2658,2025-12-10 16:27:52.144000+00:00,2025-12-10 18:26:54.315000+00:00,119.0362
2,8a3733f9fa7e4fbd835d42813cfcdaf6,63ef075bc53441918e2b2530bf2468f9,vit_b_16,0,2,1e-05,64,,True,,0.9301,0.2668,0.9303,0.9310,0.9301,0.2668,2025-12-10 18:26:54.326000+00:00,2025-12-10 20:25:32.838000+00:00,118.6419



Table exported to: data/08_reporting/mlflow_final_results.csv


In [47]:
# Create model-wise summary with best configurations
if 'Model' in final_table.columns and 'Val_Acc' in final_table.columns:
    # Create a working copy
    summary_df = final_table.copy()

    # Group by model and hyperparameters to get average across folds
    group_cols = ['Model']

    # Add hyperparameter columns that are not None for each model
    if 'LR' in summary_df.columns:
        group_cols.append('LR')
    if 'Batch' in summary_df.columns:
        group_cols.append('Batch')
    if 'Dropout' in summary_df.columns and summary_df['Dropout'].notna().any():
        group_cols.append('Dropout')
    if 'Freeze' in summary_df.columns and summary_df['Freeze'].notna().any():
        group_cols.append('Freeze')

    # Aggregate metrics across folds
    agg_metrics = {}
    if 'Val_Acc' in summary_df.columns:
        agg_metrics['Val_Acc'] = ['mean', 'std', 'max']
    if 'Val_Loss' in summary_df.columns:
        agg_metrics['Val_Loss'] = ['mean', 'std', 'min']
    if 'Val_F1' in summary_df.columns:
        agg_metrics['Val_F1'] = ['mean', 'std']
    if 'Duration_min' in summary_df.columns:
        agg_metrics['Duration_min'] = ['sum', 'mean']

    if agg_metrics:
        model_config_summary = summary_df.groupby(group_cols, dropna=False).agg(agg_metrics).round(4)

        # Sort by validation accuracy
        if ('Val_Acc', 'mean') in model_config_summary.columns:
            model_config_summary = model_config_summary.sort_values(('Val_Acc', 'mean'), ascending=False)

        print(f"\n{'='*80}")
        print(f"MODEL CONFIGURATION SUMMARY (Averaged across folds)")
        print(f"{'='*80}")
        display(model_config_summary)

        # Export
        model_config_summary.to_csv('/home/rafael/private/ufrj/CPE727-trabalho-final/data/08_reporting/model_config_summary.csv')
        print(f"\nModel configuration summary exported to: data/08_reporting/model_config_summary.csv")


MODEL CONFIGURATION SUMMARY (Averaged across folds)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Val_Acc,Val_Acc,Val_Acc,Val_Loss,Val_Loss,Val_Loss,Val_F1,Val_F1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,std,max,mean,std,min,mean,std
Model,LR,Batch,Dropout,Freeze,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
vgg16,0.0001,64,0.3,True,0.9402,0.0412,0.9789,0.2191,0.1766,0.061,0.9402,0.0413
vit_b_16,1e-05,64,,True,0.9295,0.0014,0.9308,0.2654,0.0017,0.2634,0.9297,0.0014
vgg16,1e-05,64,0.5,True,0.9064,0.0054,0.9102,0.3083,0.0148,0.2958,0.906,0.0053
vgg16,1e-05,128,0.5,True,0.9062,0.0049,0.91,0.2958,0.0159,0.284,0.9058,0.0049
vgg16,1e-05,64,0.3,True,0.9059,0.0054,0.909,0.3035,0.017,0.2891,0.9055,0.0054
vgg16,1e-05,128,0.3,True,0.9044,0.0063,0.9113,0.2994,0.0153,0.2837,0.904,0.0063
vgg16,0.0001,128,0.3,True,0.9008,0.0075,0.9086,0.3774,0.0278,0.3409,0.9003,0.0075
vgg16,0.0001,128,0.5,True,0.9004,0.0057,0.9042,0.3778,0.0109,0.3609,0.8999,0.0054
vgg16,0.0001,64,0.5,True,0.8999,0.0058,0.9056,0.3959,0.0389,0.3636,0.8998,0.0055
resnet50,0.0001,64,,True,0.8941,0.0238,0.9181,0.3443,0.0611,0.2858,0.8938,0.0238



Model configuration summary exported to: data/08_reporting/model_config_summary.csv


In [48]:
# Find best configuration per model
# Filter out rows with NaN Val_Acc
valid_runs = final_table[final_table['Val_Acc'].notna()].copy()
print(f"Valid runs with Val_Acc: {len(valid_runs)} out of {len(final_table)}")

if 'Model' in valid_runs.columns and len(valid_runs) > 0:
    print(f"\nModels in valid runs: {valid_runs['Model'].value_counts().to_dict()}")

    best_per_model = []

    for model in valid_runs['Model'].dropna().unique():
        model_data = valid_runs[valid_runs['Model'] == model].copy()

        if len(model_data) > 0:
            best_idx = model_data['Val_Acc'].idxmax()
            best_run = model_data.loc[best_idx]
            best_per_model.append(best_run)

    if best_per_model:
        best_models_df = pd.DataFrame(best_per_model)
        best_models_df = best_models_df.sort_values('Val_Acc', ascending=False)

        print(f"\n{'='*80}")
        print(f"BEST CONFIGURATION PER MODEL (Single Best Fold)")
        print(f"{'='*80}")
        display(best_models_df[['Model', 'Fold', 'LR', 'Batch', 'Dropout', 'Val_Acc', 'Val_Loss', 'Val_F1']])

        # Export
        best_models_df.to_csv('/home/rafael/private/ufrj/CPE727-trabalho-final/data/08_reporting/best_models.csv', index=False)
        print(f"\nBest models exported to: data/08_reporting/best_models.csv")
    else:
        print("No models with data found")

    # Also show the top 10 overall best runs
    print(f"\n{'='*80}")
    print(f"TOP 10 BEST RUNS (All Models)")
    print(f"{'='*80}")
    top_10 = valid_runs.nlargest(10, 'Val_Acc')[['Model', 'Fold', 'LR', 'Batch', 'Dropout', 'Val_Acc', 'Val_Loss', 'Val_F1']]
    display(top_10)

Valid runs with Val_Acc: 101 out of 119

Models in valid runs: {'vgg16': 46, 'resnet50': 40, 'mlp': 10, 'vit_b_16': 5}

BEST CONFIGURATION PER MODEL (Single Best Fold)


Unnamed: 0,Model,Fold,LR,Batch,Dropout,Val_Acc,Val_Loss,Val_F1
19,vgg16,1,0.0001,64,0.3,0.9789,0.0621,0.9789
4,vit_b_16,0,1e-05,64,,0.9308,0.2638,0.931
44,resnet50,1,0.0001,64,,0.9181,0.2858,0.9179
131,mlp,2,0.001,64,0.3,0.4669,1.4642,0.4623



Best models exported to: data/08_reporting/best_models.csv

TOP 10 BEST RUNS (All Models)


Unnamed: 0,Model,Fold,LR,Batch,Dropout,Val_Acc,Val_Loss,Val_F1
19,vgg16,1,0.0001,64,0.3,0.9789,0.0621,0.9789
15,vgg16,0,0.0001,64,0.3,0.9782,0.061,0.9783
20,vgg16,0,0.0001,64,0.3,0.9769,0.066,0.9769
14,vgg16,1,0.0001,64,0.3,0.9759,0.0669,0.9759
12,vgg16,3,0.0001,64,0.3,0.9741,0.0672,0.9741
13,vgg16,2,0.0001,64,0.3,0.9708,0.0777,0.9709
4,vit_b_16,0,1e-05,64,,0.9308,0.2638,0.931
8,vit_b_16,0,1e-05,64,,0.9303,0.2634,0.9305
2,vit_b_16,2,1e-05,64,,0.9301,0.2668,0.9303
1,vit_b_16,3,1e-05,64,,0.9292,0.2671,0.9294


In [46]:
# Check final_table in detail
print("Final table info:")
print(f"Shape: {final_table.shape}")
print(f"\nColumns: {final_table.columns.tolist()}")
print(f"\nModel value counts:")
print(final_table['Model'].value_counts(dropna=False))
print(f"\nFirst 10 rows with Model:")
display(final_table[['Model', 'Fold', 'LR', 'Batch', 'Val_Acc']].head(30))

Final table info:
Shape: (119, 13)

Columns: ['Model', 'Fold', 'LR', 'Batch', 'Dropout', 'Freeze', 'Hidden', 'Val_Acc', 'Val_Loss', 'Val_F1', 'Val_Prec', 'Val_Rec', 'Best_Val_Loss']

Model value counts:
Model
vgg16       54
resnet50    48
mlp         12
vit_b_16     5
Name: count, dtype: int64

First 10 rows with Model:


Unnamed: 0,Model,Fold,LR,Batch,Val_Acc
52,mlp,0.0,0.001,64,0.2958
133,mlp,0.0,0.001,64,0.4123
51,mlp,1.0,0.001,64,0.3456
132,mlp,1.0,0.001,64,0.4354
50,mlp,2.0,0.001,64,0.3475
131,mlp,2.0,0.001,64,0.4669
49,mlp,3.0,0.001,64,0.3495
130,mlp,3.0,0.001,64,0.4583
48,mlp,4.0,0.001,64,0.3294
129,mlp,4.0,0.001,64,0.444


In [59]:
final_table

Unnamed: 0,Run_ID,Config_Run_ID,Model,Config,Fold,LR,Batch,Dropout,Freeze,Hidden,Val_Acc,Val_Loss,Val_F1,Val_Prec,Val_Rec,Best_Val_Loss,Start_Time,End_Time,Duration_min
52,d802471bc71646aebff931b28d613a13,60bc05e6a98c4cbd9e8db8dcb189e63c,mlp,0,0,0.001,64,0.3,,"[512, 256]",0.2958,1.8003,0.2312,0.3831,0.2958,1.6781,2025-12-09 10:54:38.168000+00:00,2025-12-09 11:00:22.055000+00:00,5.7314
133,fd4b271020e14860bf5e43f0cb3a9346,b9e6967599c648558771d4441f01b867,mlp,0,0,0.001,64,0.3,,"[512, 256]",0.4123,1.5577,0.3723,0.5021,0.4123,1.5027,2025-12-09 04:23:47.736000+00:00,2025-12-09 04:24:43.682000+00:00,0.9324
51,ff92bdf779a6414b8cbabf59b27d586d,60bc05e6a98c4cbd9e8db8dcb189e63c,mlp,0,1,0.001,64,0.3,,"[512, 256]",0.3456,1.7290,0.2912,0.3160,0.3456,1.7029,2025-12-09 11:00:22.057000+00:00,2025-12-09 11:06:12.253000+00:00,5.8366
132,24bb957ea2864ed7ae37903889f6c812,b9e6967599c648558771d4441f01b867,mlp,0,1,0.001,64,0.3,,"[512, 256]",0.4354,1.4856,0.4282,0.5408,0.4354,1.4418,2025-12-09 04:24:43.684000+00:00,2025-12-09 04:25:49.603000+00:00,1.0986
50,4b033dbb8d5a4f35a0b18087045537fb,60bc05e6a98c4cbd9e8db8dcb189e63c,mlp,0,2,0.001,64,0.3,,"[512, 256]",0.3475,1.7798,0.2746,0.2956,0.3475,1.7163,2025-12-09 11:06:12.255000+00:00,2025-12-09 11:12:17.409000+00:00,6.0859
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,b80536ced7074cdbb8d5dd58aa0645a6,63ef075bc53441918e2b2530bf2468f9,vit_b_16,0,0,1e-05,64,,True,,0.9308,0.2638,0.9310,0.9318,0.9308,0.2638,2025-12-10 14:30:56.396000+00:00,2025-12-10 16:27:52.135000+00:00,116.9290
8,9bf6c39575bf447b931cdf531d612d26,4ebdc4216ec14e408bbf5dfe545e53e3,vit_b_16,0,0,1e-05,64,,True,,0.9303,0.2634,0.9305,0.9314,0.9303,0.2634,2025-12-10 10:19:35.764000+00:00,2025-12-10 12:14:47.578000+00:00,115.1969
3,ab86d7b38fef4aa8beb9e19ea38f83e6,63ef075bc53441918e2b2530bf2468f9,vit_b_16,0,1,1e-05,64,,True,,0.9273,0.2658,0.9275,0.9285,0.9273,0.2658,2025-12-10 16:27:52.144000+00:00,2025-12-10 18:26:54.315000+00:00,119.0362
2,8a3733f9fa7e4fbd835d42813cfcdaf6,63ef075bc53441918e2b2530bf2468f9,vit_b_16,0,2,1e-05,64,,True,,0.9301,0.2668,0.9303,0.9310,0.9301,0.2668,2025-12-10 18:26:54.326000+00:00,2025-12-10 20:25:32.838000+00:00,118.6419


In [56]:
# Check columns in fold_level_runs before filtering
print("Columns in child_runs_df:")
print([col for col in child_runs_df.columns if 'duration' in col.lower() or 'time' in col.lower()])
print(f"\nSample duration value: {child_runs_df['duration_minutes'].head(1).values if 'duration_minutes' in child_runs_df.columns else 'Column not found'}")

Columns in child_runs_df:
['start_time', 'end_time']

Sample duration value: Column not found
