In [22]:
import numpy as np
import torch
from ADEN import ADEN
from torchinfo import summary
from TestCaseGenerator import data_RLClustering
from ADENTrain import TrainAnneal
import utils
from Env import ClusteringEnvNumpy, ClusteringEnvTorch
from ClusteringGroundTruth import cluster_gt
import pickle
from datetime import datetime
from Plotter import PlotClustering

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
utils.set_seed(0)  # for reproducibility

Using device: cuda
[Seed fixed to 0]


In [23]:
from TestCaseGenerator import data_RLClustering

X, M, T_P, N, d = data_RLClustering(4)
rho = np.ones(N) / N  # Uniform weights

In [25]:
# get a list of all files inside Benchmark Folder
import os
from numpy import pi
import pandas as pd
from ClusteringGroundTruth import distortion

benchmark_folder = "Benchmark"
all_files = os.listdir(benchmark_folder)
all_files = [f for f in all_files if os.path.isfile(os.path.join(benchmark_folder, f))]
results_df = pd.DataFrame(
    columns=["eps", "gamma", "zeta", "T", "error_opt", "error_ig"]
)

# loop over all files
for file_name in all_files:
    with open(os.path.join(benchmark_folder, file_name), "rb") as f:
        data = pickle.load(f)

    scenario_name = data["scenario_name"]
    # from scenario name read eps, gamma, zeta, T. Example: Benchmark_parametrizedTrue_eps0.1_gamma0.0_zeta0.5_T0.001
    eps = float(scenario_name.split("_eps")[1].split("_")[0])
    gamma = float(scenario_name.split("_gamma")[1].split("_")[0])
    zeta = float(scenario_name.split("_zeta")[1].split("_")[0])
    T = float(scenario_name.split("_T")[1].split("_")[0])
    env = ClusteringEnvNumpy(
        n_data=N,
        n_clusters=M,
        n_features=d,
        parametrized=True,
        eps=eps,
        gamma=gamma,
        zeta=zeta,
        T=T,
        T_p=T_P,
    )
    Y_GT = data["Y_GT"]
    pi_GT = data["pi_GT"]
    Y_opt = data["Y_opt"]
    pi_opt = data["pi_opt"]
    Y_ig = data["Y_ig"]
    pi_ig = data["pi_ig"]
    # if any of the above values contain NAN, skip this scenario
    if np.isnan(Y_GT).any() or np.isnan(pi_GT).any():
        continue
    if np.isnan(Y_opt).any() or np.isnan(pi_opt).any():
        continue
    if np.isnan(Y_ig).any() or np.isnan(pi_ig).any():
        continue

    distortion_opt = distortion(X, Y_opt, rho, pi_opt, env)
    distortion_ig = distortion(X, Y_ig, rho, pi_ig, env)
    distortion_gt = distortion(X, Y_GT, rho, pi_GT, env)

    error_opt = (distortion_opt - distortion_gt) / distortion_gt * 100
    error_ig = (distortion_ig - distortion_gt) / distortion_gt * 100

    # Print the results
    # print(
    #     f"Scenario: {scenario_name} error_opt: {error_opt:.2f}%, error_ig: {error_ig:.2f}%"
    # )
    # based on eps, zeta, gamma, T, and the values of error_opt and error_ig, add a row to a pandas dataframe

    results_df = pd.concat(
        [
            results_df,
            pd.DataFrame(
                {
                    # "scenario_name": [scenario_name],
                    "eps": [eps],
                    "gamma": [gamma],
                    "zeta": [zeta],
                    "T": [T],
                    "error_opt": [error_opt],
                    "error_ig": [error_ig],
                }
            ),
        ],
        ignore_index=True,
    )
    # PlotClustering(X, Y_opt, pi_opt, figsize=(6, 4), save_path=None)
# print the dataframe up to 2 digits
pd.set_option("display.precision", 2)
# SAVE results_df to a csv file with current date and time
results_df.to_csv(
    f"benchmark_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False
)

  results_df = pd.concat(
