In [22]:
import os
import pandas as pd

def extract_quality_data(base_path, data_names, methods):
    KS = {data_name: {} for data_name in data_names}
    TV = {data_name: {} for data_name in data_names}
    
    for data_name in data_names:
        for method in methods:
            quality_file_path = os.path.join(base_path, data_name, method, "shape.csv")
            try:
                df = pd.read_csv(quality_file_path)

                # Compute average scores for KSComplement and TVComplement
                ks_value = df[df["Metric"] == "KSComplement"]["Score"].mean()
                tv_value = df[df["Metric"] == "TVComplement"]["Score"].mean()

                KS[data_name][method] = 100 * (1 - ks_value)
                TV[data_name][method] = 100 * (1 - tv_value)
            except Exception as e:
                print(f"Failed to read {quality_file_path}: {e}")
                KS[data_name][method] = None
                TV[data_name][method] = None
    
    ks_df = pd.DataFrame(KS)
    tv_df = pd.DataFrame(TV)
    
    ks_df.index.name = "KLTVComplement"
    tv_df.index.name = "TVTVComplement"

    return ks_df, tv_df
def load_json(file_path):
    """
    Load JSON data from a file.

    Parameters:
    - file_path (str): The path to the JSON file.

    Returns:
    - data (dict or list): The data loaded from the JSON file.
    """
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [4]:
base_path = '/Users/.../Desktop/tabsyn-main/eval/density'

In [6]:
data_names = ['adult_equal', 'default_equal', 'shoppers_equal', 'magic_equal', 'beijing_equal', 'news_equal']
methods = ['diffusion_on_copula', 'simple_KDE_VAE_encoding', 'KDE_VAE_encoding',  'smote', 'simple_KDE', 'tabsyn', 'TabKDE' ]

In [10]:
ks_df, tv_df = extract_quality_data(base_path, data_names, methods)

In [12]:
# === Display in Console ===
print("=== KS DataFrame ===")

ks_df

=== KS DataFrame ===


Unnamed: 0_level_0,adult_equal,default_equal,shoppers_equal,magic_equal,beijing_equal,news_equal
KLTVComplement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
diffusion_on_copula,2.807305,2.3,11.933496,1.220949,17.124656,2.92319
simple_KDE_VAE_encoding,4.66219,6.599048,7.651257,3.983594,4.355234,10.193878
KDE_VAE_encoding,2.789288,3.242381,6.533658,1.579556,1.497817,4.211175
smote,1.782819,2.124286,2.686131,1.512252,2.711672,5.638553
simple_KDE,2.058799,4.27381,13.396594,3.069723,17.316245,7.579523
tabsyn,0.83695,1.967619,2.849959,1.335577,1.051004,2.197492
TabKDE,1.400377,1.172381,12.381184,1.025344,16.839325,2.44206


In [14]:

print("\n=== Trend DataFrame ===")
tv_df


=== Trend DataFrame ===


Unnamed: 0_level_0,adult_equal,default_equal,shoppers_equal,magic_equal,beijing_equal,news_equal
TVTVComplement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
diffusion_on_copula,5.624846,2.874667,4.610706,0.673047,3.864355,4.194834
simple_KDE_VAE_encoding,7.656621,9.142,9.947283,6.60427,8.085066,15.800626
KDE_VAE_encoding,6.796741,8.134667,8.092052,5.773478,7.974902,14.413278
smote,1.549013,1.094,2.631792,0.0,1.243414,1.51347
simple_KDE,2.728278,2.618667,3.156934,0.914923,2.418814,4.893553
tabsyn,0.726804,1.099333,2.203974,0.904406,1.805729,2.335789
TabKDE,2.789698,2.846667,2.712895,0.767694,2.011687,1.788417


In [26]:
Methods_name = ['Real', 'Test', 'Tabsyn', 'Smote', 'CopulaDiff', 'VAETabKDE', 'SimpleKDE','TabKDE']
Folders_name = ['real', 'test', 'tabsyn', 'smote', 'diffusion_on_copula', 'KDE_VAE_encoding', 'simple_KDE', 'TabKDE']
Datas_name = ['adult_equal', 'default_equal', 'shoppers_equal', 'magic_equal', 'beijing_equal', 'news_equal', 'ibm_func']
features_name = {}

In [28]:
for data_name in Datas_name:
    path_1 = f'/Users/.../Desktop/tabsyn-main/eval/density/{data_name}/real/'
    features_name[data_name] = load_json(path_1 + 'metadata.json')["column_names"]

In [30]:
features_name['beijing_equal']

['DEWP',
 'TEMP',
 'PRES',
 'Iws',
 'Is',
 'Ir',
 'pm2.5',
 'year',
 'month',
 'day',
 'hour',
 'cbwd']