In [2]:
import sys
sys.path.append("./CVAE")

In [3]:
!pip install pyro-ppl
from main import main
import argparse
import itertools
import pandas as pd
import shutil
import os
import glob

Collecting pyro-ppl
  Using cached pyro_ppl-1.9.1-py3-none-any.whl.metadata (7.8 kB)
Collecting pyro-api>=0.1.1 (from pyro-ppl)
  Using cached pyro_api-0.1.2-py3-none-any.whl.metadata (2.5 kB)
Using cached pyro_ppl-1.9.1-py3-none-any.whl (755 kB)
Using cached pyro_api-0.1.2-py3-none-any.whl (11 kB)
Installing collected packages: pyro-api, pyro-ppl
Successfully installed pyro-api-0.1.2 pyro-ppl-1.9.1


In [4]:
# Define the hyperparameter search space
learning_rates = [1e-4, 1e-3]
num_epochs_list = [50, 101]
num_particles_list = [20, 10]
z_dims = [200, 400]
random_mask = True

# Iterate over all combinations of hyperparameters
for lr, num_epochs, num_particles, z_dim in itertools.product(learning_rates, num_epochs_list, num_particles_list, z_dims):
    # Set up arguments
    args = argparse.Namespace(
        num_quadrant_inputs=[1, 2, 3],
        num_epochs=num_epochs,
        early_stop_patience=3,
        learning_rate=lr,
        cuda=True,
        num_images=10,
        num_samples=10,
        num_particles=num_particles,
        dataset='cifar10',
        z_dim=z_dim,
        random_mask=random_mask,
        hidden_1=500,
        hidden_2=500,
    )

    # Log the current configuration
    print(f"Running with lr={lr}, num_epochs={num_epochs}, num_particles={num_particles}, z-dim={z_dim}")

    # Run the main function
    try:
        main(args)
    except Exception as e:
        print(f"Error with configuration lr={lr}, num_epochs={num_epochs}, num_particles={num_particles}, z-dim={z_dim}: {e}")
        continue

    # Save the results
    if os.path.exists("results.csv"):
        output_file = f"tuning_results/results_lr_{lr}_epochs_{num_epochs}_particles_{num_particles}_z_{z_dim}.csv"
        shutil.copy("results.csv", output_file)
        print(f"Results saved to {output_file}")
    else:
        print(f"No 'results.csv' file generated for lr={lr}, num_epochs={num_epochs}, num_particles={num_particles}_z_{z_dim}.")
    # Backup important files before starting

    for q in range(1, 3):
        file_name = "cvae{}_plot_q{}.png".format("_rand_mask" if random_mask else "", q)
        if os.path.exists(file_name):
            output_file = f"tuning_results/cvae_plot{'_rand_mask' if random_mask else ''}_q{q}_lr_{lr}_epochs_{num_epochs}_particles_{num_particles}_z_{z_dim}.png"
            shutil.copy(file_name, output_file)
            print(f"Backup created for '{file_name}'.")


Running with lr=0.0001, num_epochs=50, num_particles=20, z-dim=200
Device:  cuda:0
Using random mask
Training with 1 quadrant as input...
Files already downloaded and verified
Files already downloaded and verified
Error with configuration lr=0.0001, num_epochs=50, num_particles=20, z-dim=200: 'Namespace' object has no attribute 'allow_baseline_reuse'
Running with lr=0.0001, num_epochs=50, num_particles=20, z-dim=400
Device:  cuda:0
Using random mask
Training with 1 quadrant as input...
Files already downloaded and verified
Files already downloaded and verified
Error with configuration lr=0.0001, num_epochs=50, num_particles=20, z-dim=400: 'Namespace' object has no attribute 'allow_baseline_reuse'
Running with lr=0.0001, num_epochs=50, num_particles=10, z-dim=200
Device:  cuda:0
Using random mask
Training with 1 quadrant as input...
Files already downloaded and verified
Files already downloaded and verified
Error with configuration lr=0.0001, num_epochs=50, num_particles=10, z-dim=200: 

KeyboardInterrupt: 

In [5]:
# Combine all results for analysis
result_files = glob.glob("tuning_results/*.csv")
if result_files:
    combined_results = []

    for file in result_files:
        # Read each result file
        df = pd.read_csv(file)

        # Rename the first column if necessary
        if 'Unnamed: 0' in df.columns:
            df.rename(columns={'Unnamed: 0': 'Method'}, inplace=True)

        print(df)

        # Extract performance gaps and compute the aggregate gap
        performance_gap_row = df.loc[df['Method'] == 'Performance gap']
        aggregate_gap = performance_gap_row[['1 quadrant', '2 quadrants', '3 quadrants']].sum(axis=1).values[0]

        # Store results with the file name and aggregate gap
        combined_results.append({
            "file": file,
            "aggregate_gap": aggregate_gap
        })

    # Convert to a DataFrame for sorting and analysis
    combined_results_df = pd.DataFrame(combined_results)

    # Sort by aggregate gap in descending order (largest gap first)
    combined_results_df = combined_results_df.sort_values(by="aggregate_gap", ascending=False)

    # Save the combined results to a file
    combined_results_df.to_csv("tuning_results/combined_results.csv", index=False)
    print("All results combined and saved to 'tuning_results/combined_results.csv'.")

    # Display the best configuration
    print("Best configuration:")
    best_file = combined_results_df.iloc[0]['file']
    print(f"File: {best_file}, Aggregate Performance Gap: {combined_results_df.iloc[0]['aggregate_gap']}")

    # Optionally, load and display the details of the best result
    best_result = pd.read_csv(best_file)
    print("\nDetails of the best configuration:")
    print(best_result)
else:
    print("No results to combine.")


               Method   1 quadrant  2 quadrants  3 quadrants
0       NN (baseline)  2236.662846  1754.437353  1413.662279
1  CVAE (Monte Carlo)  1423.987901  1035.200535   529.468100
2     Performance gap   812.674944   719.236818   884.194179
               Method   1 quadrant  2 quadrants  3 quadrants
0       NN (baseline)  2545.570408  2551.728035  2139.701337
1  CVAE (Monte Carlo)  1404.176360  1030.918563   530.842307
2     Performance gap  1141.394047  1520.809473  1608.859031
               Method   1 quadrant  2 quadrants  3 quadrants
0       NN (baseline)  3453.096601  2748.454997  1947.861866
1  CVAE (Monte Carlo)  1421.109816  1084.252118   554.312988
2     Performance gap  2031.986784  1664.202879  1393.548878
               Method   1 quadrant  2 quadrants  3 quadrants
0       NN (baseline)  2560.857781  1611.340043  1710.103900
1  CVAE (Monte Carlo)  1412.916168  1040.832582   532.713018
2     Performance gap  1147.941614   570.507461  1177.390882
               Method   

In [8]:
# zip all the files in tuning_results folder
import tarfile
import os
import glob

with tarfile.open("tuning_results.tar.gz", "w:gz") as tar:
    print("num of files:", len(glob.glob("tuning_results/*")))
    for file in glob.glob("./tuning_results/*"):
        tar.add(file, arcname=os.path.basename(file))


num of files: 49
