In [1]:
import numpy as np
import torch
from ADEN import ADEN
from torchinfo import summary
from TestCaseGenerator import data_RLClustering
from ADENTrain import TrainAnneal
import utils
from Env import ClusteringEnvNumpy, ClusteringEnvTorch
from ClusteringGroundTruth import cluster_gt
import pickle
from datetime import datetime
from Plotter import PlotClustering

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
utils.set_seed(0)  # for reproducibility

Using device: cuda
[Seed fixed to 0]


In [None]:
# from TestCaseGenerator import data_RLClustering

# X, M, T_P, N, d = data_RLClustering(4)
# rho = np.ones(N) / N  # Uniform weights

In [2]:
import scipy
address = f"MATLAB Codes/UTD19_London.mat"
# read as numpy array
data = scipy.io.loadmat(address)
locs = data['Xz']
# normalize locs to be in [0,1]x[0,1]
X = (locs - np.min(locs, axis=0)) / (np.max(locs, axis=0) - np.min(locs, axis=0))
N, d = X.shape
M = 25
rho = np.ones(N) / N  # Uniform weights

In [3]:
import re

def extract_params(s):
    """
    Extract eps, gamma, zeta, and T from a string like:
    'UDT_M50eps0.1gam0.0zet1.0T0.01D64_...'
    Returns a dict with floats.
    """
    pattern = (
        r"eps(?P<eps>[\d.]+)"
        r"gam(?P<gamma>[\d.]+)"
        r"zet(?P<zeta>[\d.]+)"
        r"T(?P<T>[\d.]+)"
    )
    m = re.search(pattern, s)
    if not m:
        raise ValueError("Could not parse parameters from string.")
    return {k: float(v) for k, v in m.groupdict().items()}

In [None]:
# get a list of all files inside Benchmark Folder
import os
import pandas as pd
from ClusteringGroundTruth import distortion

benchmark_folder = "BenchmarkUDT"
HAS_GT = True
all_files = os.listdir(benchmark_folder)
all_files = [f for f in all_files if os.path.isfile(os.path.join(benchmark_folder, f))]
results_df = pd.DataFrame(
    columns=["eps", "gamma", "zeta", "T", "error_opt", "error_ig"]
)

# loop over all files
for file_name in all_files:
    with open(os.path.join(benchmark_folder, file_name), "rb") as f:
        data = pickle.load(f)

    scenario_name = data["scenario_name"]
    # from scenario name read eps, gamma, zeta, T. Example: Benchmark_parametrizedTrue_eps0.1_gamma0.0_zeta0.5_T0.001
    # eps = float(scenario_name.split("eps")[1].split("_")[0])
    # gamma = float(scenario_name.split("gamma")[1].split("_")[0])
    # zeta = float(scenario_name.split("zeta")[1].split("_")[0])
    # T = float(scenario_name.split("T")[2].split("_")[0])
    params = extract_params(scenario_name)
    eps = params["eps"]
    gamma = params["gamma"]
    zeta = params["zeta"]
    T = params["T"]
    # print("eps:", eps, "gamma:", gamma, "zeta:", zeta, "T:", T)
    if T == 0.1:
        continue
    env = ClusteringEnvNumpy(
        n_data=N,
        n_clusters=M,
        n_features=d,
        parametrized=True,
        eps=eps,
        gamma=gamma,
        zeta=zeta,
        T=T,
        T_p=0.0,
    )
    if HAS_GT:
        Y_GT = data["Y_GT"]
        pi_GT = data["pi_GT"]
        if np.isnan(Y_GT).any() or np.isnan(pi_GT).any():
            continue
        distortion_gt = distortion(X, Y_GT, rho, pi_GT, env)
    Y_opt = data["Y_opt"]
    pi_opt = data["pi_opt"]
    Y_ig = data["Y_ig"]
    pi_ig = data["pi_ig"]
    # if any of the above values contain NAN, skip this scenario

    if np.isnan(Y_opt).any() or np.isnan(pi_opt).any():
        continue
    if np.isnan(Y_ig).any() or np.isnan(pi_ig).any():
        continue

    distortion_opt = distortion(X, Y_opt, rho, pi_opt, env)
    distortion_ig = distortion(X, Y_ig, rho, pi_ig, env)
    

    error_opt = (distortion_opt - distortion_gt) / distortion_gt * 100
    error_ig = (distortion_ig - distortion_gt) / distortion_gt * 100
    # error_opt_ig = (distortion_ig - distortion_opt) / distortion_opt * 100

    # Print the results
    # print(
    #     f"Scenario: {scenario_name} error_opt: {error_opt:.2f}%, error_ig: {error_ig:.2f}%"
    # )
    # based on eps, zeta, gamma, T, and the values of error_opt and error_ig, add a row to a pandas dataframe

    # results_df = pd.concat(
    #     [
    #         results_df,
    #         pd.DataFrame(
    #             {
    #                 # "scenario_name": [scenario_name],
    #                 "eps": [eps],
    #                 "gamma": [gamma],
    #                 "zeta": [zeta],
    #                 "T": [T],
    #                 "error_opt": [error_opt],
    #                 "error_ig": [error_ig],
    #             }
    #         ),
    #     ],
    #     ignore_index=True,
    # )
    print(scenario_name)
    # print(error_opt_ig)
    print("ig:{:.4f}, opt:{:.4f}, gt:{:.4f}".format(distortion_ig, distortion_opt, distortion_gt))
    print("error opt:{:.2f}%, error_ig:{:.2f}%".format(error_opt, error_ig))

    # PlotClustering(X, Y_opt, pi_opt, figsize=(6, 4),
    # point_size=10,
    # centroid_size=300,
    # alpha=0.9,
    # data_edge_color='white',
    # cluster_edge_color='black', 
    # save_path=f"Results/{scenario_name}.png"
    # )
# print the dataframe up to 2 digits
pd.set_option("display.precision", 2)
# SAVE results_df to a csv file with current date and time
# results_df.to_csv(
#     f"benchmark_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False
# )