In [1]:
import sys
sys.path.append("./PriorFlowCVAE")

In [2]:
from main import main
import argparse
import itertools
import pandas as pd
import shutil
import os
import glob
import os
import torch

In [3]:
if torch.cuda.is_available():
    print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("CUDA is not available. Using CPU.")

CUDA is available. Using GPU: NVIDIA A40


In [4]:
# Define the hyperparameter search space
lr = [1e-4] #1e-3
num_particl = [20] #10
num_epochs = [200] #50
z = [500] #, 250 makla
hidd1 = [700]  # Around the current hidden_1=500, 400, 500, makla
hidd2 = [600]  # Around the current hidden_2=500
random_mask = False

# Iterate over all combinations of hyperparameters
for lr, num_ep, num_part, z, hid1, hid2 in itertools.product(lr, num_epochs, num_particl, z, hidd1, hidd2):
    # Set up arguments
    args = argparse.Namespace(
        num_quadrant_inputs=[1, 2, 3],
        num_epochs=num_ep,
        early_stop_patience=3,
        learning_rate=lr,
        cuda=True,
        num_images=10,
        num_samples=10,
        num_particles=num_part,
        dataset='fashionmnist',
        z_dim=z,
        random_mask=random_mask,
        hidden_1=hid1,
        hidden_2=hid2,
        allow_baseline_reuse=False,
        use_conv=False
    )

    # Log the current configuration
    print(f"Running with lr={lr}, num_epochs={num_ep}, num_particles={num_part}, z_dim={z}, hidden_1={hid1}, hidden_2={hid2}")

    # Run the main function
    try:
        main(args)
    except Exception as e:
        print(f"Error with configuration lr={lr}, num_epochs={num_ep}, num_particles={num_part}, z_dim={z}, hidden_1={hid1}, hidden_2={hid2}: {e}")
        continue

    # Save the results
    if os.path.exists("results.csv"):
        output_file = f"PriorFlowCVAE/tuning_results_fashion/csvs/results_lr_{lr}_epochs_{num_ep}_particles_{num_part}_z_{z}_h1_{hid1}_h2_{hid2}.csv"
        shutil.copy("results.csv", output_file)
        print(f"Results saved to {output_file}")
    else:
        print(f"No 'results.csv' file generated for lr={lr}, num_epochs={num_ep}, num_particles={num_part}, z_dim={z}, hidden_1={hid1}, hidden_2={hid2}.")

    # Backup important files before starting
    for q in range(1, 4):
        plot_file = f"cvae_plot_q{q}.png"
        if os.path.exists(plot_file):
            output_file = f"PriorFlowCVAE/tuning_results_fashion/images/cvae_plot_q{q}_lr_{lr}_epochs_{num_ep}_particles_{num_part}_z_{z}_h1_{hid1}_h2_{hid2}.png"
            shutil.copy(plot_file, output_file)
            print(f"Backup created for '{plot_file}' as '{output_file}'.")

Running with lr=0.0001, num_epochs=200, num_particles=20, z_dim=500, hidden_1=700, hidden_2=600
Device:  cuda:0
Training with 1 quadrant as input...


NN Epoch 0 train    : 100%|██████████| 118/118 [00:10<00:00, 11.20it/s, early_stop_count=0, loss=304.10]
NN Epoch 0 val      : 100%|██████████| 20/20 [00:01<00:00, 14.21it/s, early_stop_count=0, loss=269.50]
NN Epoch 1 train    : 100%|██████████| 118/118 [00:09<00:00, 12.58it/s, early_stop_count=0, loss=247.14]
NN Epoch 1 val      : 100%|██████████| 20/20 [00:01<00:00, 13.88it/s, early_stop_count=0, loss=229.11]
NN Epoch 2 train    : 100%|██████████| 118/118 [00:10<00:00, 10.90it/s, early_stop_count=0, loss=224.80]
NN Epoch 2 val      : 100%|██████████| 20/20 [00:01<00:00, 13.94it/s, early_stop_count=0, loss=220.96]
NN Epoch 3 train    : 100%|██████████| 118/118 [00:09<00:00, 12.19it/s, early_stop_count=0, loss=216.71]
NN Epoch 3 val      : 100%|██████████| 20/20 [00:01<00:00, 12.67it/s, early_stop_count=0, loss=212.05]
NN Epoch 4 train    : 100%|██████████| 118/118 [00:09<00:00, 12.09it/s, early_stop_count=0, loss=208.04]
NN Epoch 4 val      : 100%|██████████| 20/20 [00:01<00:00, 14.1

Df:                      1 quadrant
NN (baseline)       187.206753
CVAE (Monte Carlo)  178.854739
Columns:  ['1 quadrant']
Training with 2 quadrants as input...


NN Epoch 0 train    : 100%|██████████| 118/118 [00:07<00:00, 14.94it/s, early_stop_count=0, loss=208.77]
NN Epoch 0 val      : 100%|██████████| 20/20 [00:01<00:00, 15.81it/s, early_stop_count=0, loss=173.59]
NN Epoch 1 train    : 100%|██████████| 118/118 [00:08<00:00, 14.68it/s, early_stop_count=0, loss=159.06]
NN Epoch 1 val      : 100%|██████████| 20/20 [00:01<00:00, 15.52it/s, early_stop_count=0, loss=152.86]
NN Epoch 2 train    : 100%|██████████| 118/118 [00:07<00:00, 14.94it/s, early_stop_count=0, loss=149.13]
NN Epoch 2 val      : 100%|██████████| 20/20 [00:01<00:00, 15.66it/s, early_stop_count=0, loss=146.57]
NN Epoch 3 train    : 100%|██████████| 118/118 [00:07<00:00, 14.82it/s, early_stop_count=0, loss=144.25]
NN Epoch 3 val      : 100%|██████████| 20/20 [00:01<00:00, 15.75it/s, early_stop_count=0, loss=142.40]
NN Epoch 4 train    : 100%|██████████| 118/118 [00:07<00:00, 15.01it/s, early_stop_count=0, loss=140.66]
NN Epoch 4 val      : 100%|██████████| 20/20 [00:01<00:00, 14.4

Df:                     2 quadrants
NN (baseline)       128.786508
CVAE (Monte Carlo)  132.182842
Columns:  ['1 quadrant', '2 quadrants']
Training with 3 quadrants as input...


NN Epoch 0 train    : 100%|██████████| 118/118 [00:10<00:00, 11.60it/s, early_stop_count=0, loss=101.72]
NN Epoch 0 val      : 100%|██████████| 20/20 [00:01<00:00, 13.23it/s, early_stop_count=0, loss=79.05]
NN Epoch 1 train    : 100%|██████████| 118/118 [00:08<00:00, 13.36it/s, early_stop_count=0, loss=75.45]
NN Epoch 1 val      : 100%|██████████| 20/20 [00:01<00:00, 14.01it/s, early_stop_count=0, loss=73.32]
NN Epoch 2 train    : 100%|██████████| 118/118 [00:07<00:00, 15.06it/s, early_stop_count=0, loss=71.97]
NN Epoch 2 val      : 100%|██████████| 20/20 [00:01<00:00, 15.71it/s, early_stop_count=0, loss=71.56]
NN Epoch 3 train    : 100%|██████████| 118/118 [00:08<00:00, 14.69it/s, early_stop_count=0, loss=70.51]
NN Epoch 3 val      : 100%|██████████| 20/20 [00:01<00:00, 15.42it/s, early_stop_count=0, loss=70.38]
NN Epoch 4 train    : 100%|██████████| 118/118 [00:08<00:00, 14.74it/s, early_stop_count=0, loss=69.58]
NN Epoch 4 val      : 100%|██████████| 20/20 [00:01<00:00, 15.70it/s, e

Error with configuration lr=0.0001, num_epochs=200, num_particles=20, z_dim=500, hidden_1=700, hidden_2=600: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.






<Figure size 640x480 with 0 Axes>

In [5]:
# Combine all results for analysis
result_files = glob.glob("PriorFlowCVAE/tuning_results_fashion/csvs/*.csv")
if result_files:
    combined_results = []

    for file in result_files:
        # Read each result file
        df = pd.read_csv(file)

        # Rename the first column if necessary
        if 'Unnamed: 0' in df.columns:
            df.rename(columns={'Unnamed: 0': 'Method'}, inplace=True)

        print(df)

        # Extract performance gaps and compute the aggregate gap
        # Extract performance gaps and compute the aggregate gap
        performance_gap_row = df.loc[df['Method'] == 'Performance gap']
        aggregate_gap = performance_gap_row[['1 quadrant', '2 quadrants', '3 quadrants']].sum(axis=1).values[0]

        # Store results with the file name and aggregate gap
        combined_results.append({
            "file": file,
            "aggregate_gap": aggregate_gap,
            "1 quadrant gap": performance_gap_row['1 quadrant'].values[0],
            "2 quadrants gap": performance_gap_row['2 quadrants'].values[0],
            "3 quadrants gap": performance_gap_row['3 quadrants'].values[0]
        })

    # Convert to a DataFrame for sorting and analysis
    combined_results_df = pd.DataFrame(combined_results)

    # Sort by aggregate gap in descending order (largest gap first)
    combined_results_df = combined_results_df.sort_values(by="aggregate_gap", ascending=False)

    # Save the combined results to a file
    combined_results_df.to_csv("CVAE/tuning_results_fashion/combined_results.csv", index=False)
    print("All results combined and saved to 'CVAE/tuning_results/fashion/combined_results.csv'.")

    # Display the best configuration
    print("Best configuration:")
    best_file = combined_results_df.iloc[0]['file']
    print(f"File: {best_file}, Aggregate Performance Gap: {combined_results_df.iloc[0]['aggregate_gap']}")

    # Optionally, load and display the details of the best result
    best_result = pd.read_csv(best_file)
    print("\nDetails of the best configuration:")
    print(best_result)
else:
    print("No results to combine.")


No results to combine.
