In [None]:
import torch
# from rfm import LaplaceRFM
import argparse
import sys
import os

import joblib

import pandas as pd

from sklearn.preprocessing import StandardScaler
parent_dir = os.path.abspath("..")  
sys.path.append(parent_dir)
from tools_1 import *


import time
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from tools import *

from scipy.spatial import KDTree

from IPython.display import display

import matplotlib.pyplot as plt
%matplotlib inline

import scipy.stats as stats

from scipy.stats import gaussian_kde

import plotly.graph_objects as go

%load_ext autoreload
%autoreload 2


def plot_distributions(data_list, plot_type='kde', title='Distribution Plot', legends=None,
                       shade=False, save_to_path=None, alpha=0.3):
    """
    Plot distributions of multiple datasets using either KDE or histogram.

    Parameters:
    - data_list: List of 1D arrays/lists of numerical values.
    - plot_type: 'kde' or 'hist' for the type of distribution plot.
    - title: Title of the plot.
    - legends: List of legend labels for each distribution.
    - shade: Boolean, if True and plot_type is 'kde', shade the area under the curve.
    - save_to_path: If not None, save the plot to this file path.
    - alpha: Transparency level for the plots.
    """
    plt.figure(figsize=(10, 6))

    # Use a distinguishable colour palette from matplotlib's tab10
    num_colors = len(data_list)
    colour_list = plt.colormaps.get_cmap('tab10').colors

    for idx, data in enumerate(data_list):
        data = np.asarray(data, dtype=np.float64).flatten()
        label = legends[idx] if legends and idx < len(legends) else f'Data {idx + 1}'

        # Select colour from tab10 palette or wrap around if needed
        colour = colour_list[idx % len(colour_list)]

        if plot_type == 'kde':
            sns.kdeplot(data, label=label, bw_adjust=1, fill=shade, alpha=alpha, color=colour)
        elif plot_type == 'hist':
            plt.hist(data, bins=30, alpha=0.6, label=label, density=True, color=colour)
        else:
            raise ValueError("plot_type must be either 'kde' or 'hist'")

    plt.title(title)
    plt.xlabel('Value')
    plt.ylabel('Density')
    plt.legend()
    plt.tight_layout()

    # Save plot if save_to_path is provided
    if save_to_path:
        # Create directory if it doesn't exist
        save_dir = os.path.dirname(save_to_path)
        if save_dir and not os.path.exists(save_dir):
            os.makedirs(save_dir)

        plt.savefig(save_to_path)

    # Always show the plot
    plt.show()


def plot_marginals(dataframes, categorical_features, df_names=None, columns_to_plot=None,
                   plot_type='kde', title='Marginal Plots', alpha=0.6, save_path=None, shade = True):
    """
    Plot marginal distributions for numerical and categorical features.

    Parameters:
    - dataframes: List of pandas DataFrames to plot.
    - categorical_features: List of column names considered as categorical.
    - df_names: List of names corresponding to each DataFrame.
    - columns_to_plot: List of columns to plot. If None, plot all columns.
    - plot_type: 'kde' or 'hist' for numerical distribution.
    - title: Title of the plot.
    - alpha: Transparency level for all plots.
    - save_path: Path to save the plots. A folder is created if it does not exist.
    """
    if not isinstance(dataframes, list):
        raise ValueError("dataframes must be a list of pandas DataFrames.")
    if df_names and len(df_names) != len(dataframes):
        raise ValueError("Length of df_names must match the number of dataframes.")

    # Use default DataFrame names if none are provided
    if df_names is None:
        df_names = [f'DF {idx + 1}' for idx in range(len(dataframes))]

    # Get the list of columns to plot
    all_columns = dataframes[0].columns
    columns_to_plot = columns_to_plot if columns_to_plot else all_columns

    # Create save folder if save_path is provided
    if save_path:
        os.makedirs(save_path, exist_ok=True)

    # Separate numerical and categorical features
    num_features = list(set(columns_to_plot) - set(categorical_features))

    for feature in columns_to_plot:
        plt.figure(figsize=(8, 5))

        if feature in categorical_features:
            # Define bar width and bar positions for side-by-side plotting
            bar_width = 0.35 / len(dataframes)  # Space bars equally for multiple DFs
            categories = dataframes[0][feature].dropna().unique()
            positions = np.arange(len(categories))

            for idx, df in enumerate(dataframes):
                df_name = df_names[idx]
                value_counts = df[feature].value_counts(normalize=True).reindex(categories, fill_value=0)

                # Shift bar positions for different DataFrames
                bar_position = positions + (idx * bar_width) - (bar_width * len(dataframes) / 2)
                plt.bar(bar_position, value_counts.values, width=bar_width, alpha=alpha,
                        label=f'{df_name}')

            plt.xticks(positions, categories)
        elif feature in num_features:
            # Plot KDE or histogram for numerical features
            for idx, df in enumerate(dataframes):
                df_name = df_names[idx]
                if plot_type == 'kde':
                    sns.kdeplot(df[feature].dropna(), label=f'{df_name}',
                                alpha=alpha, fill=shade)
                elif plot_type == 'hist':
                    plt.hist(df[feature].dropna(), bins=30, alpha=alpha,
                             label=f'{df_name}', density=True)
                else:
                    raise ValueError("plot_type must be either 'kde' or 'hist'")

        plt.title(f'{feature} - {title}')
        # plt.xlabel('Value' if feature in num_features else 'Category')
        plt.xlabel(feature)
        plt.ylabel('Density' if feature in num_features else 'Frequency')
        plt.legend()
        plt.tight_layout()

        # Save plot if save_path is provided
        if save_path:
            file_name = os.path.join(save_path, f'{feature}.png')
            plt.savefig(file_name)

        # Always show the plot
        plt.show()

In [None]:
sys.path.append("/Users/.../Desktop/tabkde-main")


# Now you can import your module
import tabkde

In [None]:
original_dir = os.getcwd()
cprint(f'current_dir is {original_dir}', color = 'green')
parent_dir = os.path.abspath("..")  
cprint(f'parent_dir is {parent_dir}', color = 'red')
parent_of_parent_dir = os.path.abspath(os.path.join("..", ".."))
cprint(f'parent_of_parent_dir is {parent_of_parent_dir}', color = 'blue')

In [None]:
# Save the current working directory
data_name = 'ibm_func'
data_path = parent_of_parent_dir + f'/synthetic/{data_name}/'
cprint(f'data_dir is {data_path}', color = 'cyan')

In [None]:
json_path = f'/tabkde-main/data/{data_name}/info.json'

In [None]:
plot_dir = original_dir + '/Plots/'
cprint(f'plot_dir is {plot_dir}', color = 'blue', bold = True)

In [None]:
data_sets = [
    'real.csv', 'test.csv', 'tabsyn.csv', 'diffusion_on_copula.csv',
    'simple_KDE_VAE_encoding.csv', 'KDE_VAE_encoding.csv',
    'smote.csv', 'TabKDE.csv', 'simple_KDE.csv'
]

# Load all specified files
loaded_data = load_files(data_path, *data_sets)

# Extract datasets individually
real = loaded_data.get("real")
test = loaded_data.get("test")
Tabsyn = loaded_data.get("tabsyn")
diffusion_on_copula = loaded_data.get("diffusion_on_copula")
simple_KDE_VAE_encoding = loaded_data.get("simple_KDE_VAE_encoding")
KDE_VAE_encoding = loaded_data.get("KDE_VAE_encoding")
smote = loaded_data.get("smote")
TabKDE = loaded_data.get("TabKDE")
simple_KDE = loaded_data.get("simple_KDE")

# Confirm successful loading
if loaded_data is not None:
    print("✅ Files loaded successfully.\n")
    print(f"real shape: {real.shape}")
    print(f"test shape: {test.shape}")
    # print(f"tabsyn shape: {Tabsyn.shape}")
    print(f"diffusion_on_copula shape: {diffusion_on_copula.shape}")
    # print(f"simple_KDE_VAE_encoding shape: {simple_KDE_VAE_encoding.shape}")
    # print(f"KDE_VAE_encoding shape: {KDE_VAE_encoding.shape}")
    # print(f"smote shape: {smote.shape}")
    print(f"TabKDE shape: {TabKDE.shape}")
    print(f"simple_KDE shape: {simple_KDE.shape}")
else:
    print("❌ Failed to load files.")

In [None]:
def load_model(name, path):
    full_path = os.path.join(path, f"{name}.joblib")
    return joblib.load(full_path)


def load_all_models(dataname):
    model_path = f'{parent_of_parent_dir}/tabsyn/copula_encoding/ckpt/{dataname}/'
    cprint(f'Downloading the models from {model_path}', color = 'green')
    copula_model = load_model(name = 'copula_model', path = model_path)
    ordinal_encoder_model = load_model(name = 'ordinal_encoder_model', path = model_path)
    mix_gaussian_model = load_model(name = 'copula_model', path = model_path)
    return ordinal_encoder_model, copula_model, mix_gaussian_model

In [None]:
ordinal_encoder_model, copula_model, mix_gaussian_model = load_all_models(data_name)

In [None]:
columns, cat_columns, num_columns= ordinal_encoder_model.get_columns()

In [None]:
def count_unique_categories(df, categorical_features):
    return {feature: df[feature].nunique(dropna=True) for feature in categorical_features}
freqs = count_unique_categories(real, cat_columns)
print(freqs)
print(f'one-hot encoding dimension is {sum(freqs.values())}')

In [None]:
columns

In [None]:
real_encoded, _ = ordinal_encoder_model.transform(real)
test_encoded, _ = ordinal_encoder_model.transform(test)
# tabsyn_encoded, _ = ordinal_encoder_model.transform(Tabsyn)
diffusion_on_copula_encoded, _ = ordinal_encoder_model.transform(diffusion_on_copula)
# simple_KDE_VAE_encoding_encoded, _ = ordinal_encoder_model.transform(simple_KDE_VAE_encoding)
# KDE_VAE_encoding_encoded, _ = ordinal_encoder_model.transform(KDE_VAE_encoding)
smote_encoded, _ = ordinal_encoder_model.transform(smote)
TabKDE_encoded, _ = ordinal_encoder_model.transform(TabKDE)
simple_KDE_encoded, _ = ordinal_encoder_model.transform(simple_KDE)

In [None]:
def save_datasets_with_dummy_label(real, test, save_path):
    """
    Adds a dummy label column with value 'a' to both datasets and saves them as CSV files.

    Parameters:
    - real (pd.DataFrame): Real dataset.
    - test (pd.DataFrame): Test dataset.
    - save_path (str): Path where CSV files will be saved.

    Files saved:
    - real.csv
    - test.csv
    """
    # Ensure the save path exists
    os.makedirs(save_path, exist_ok=True)

    # Add dummy label column
    real['label'] = 'a'
    test['label'] = 'a'

    # Save datasets
    real.to_csv(os.path.join(save_path, 'real.csv'), index=False)
    test.to_csv(os.path.join(save_path, 'test.csv'), index=False)

save_datasets_with_dummy_label(real_encoded.copy(), test_encoded.copy(), 
                               save_path = original_dir +f'/Data_for_Our_encoding_Tabsyn/{data_name}_copula_encoded')

In [None]:
OrdinaTabsyn_path = '/tabkde-main/synthetic/ibm_func_copula_encoded/tabsyn.csv' 

In [None]:
df_OrdinaTabsyn = pd.read_csv(OrdinaTabsyn_path)

In [None]:
C = list(df_OrdinaTabsyn_path.columns)[:-1]

In [None]:
OrdinaTabsyn_encoded = df_OrdinaTabsyn[C]

In [None]:
syn_OrdinaTabsyn = ordinal_encoder_model.decode(OrdinaTabsyn_encoded)

In [None]:
syn_OrdinaTabsyn.head()

In [None]:
freqs = count_unique_categories(syn_OrdinaTabsyn, cat_columns)
print(freqs)
print(f'one-hot encoding dimension is {sum(freqs.values())}')

In [None]:
counts = syn_OrdinaTabsyn['Is Fraud?'].value_counts()
counts

In [None]:
counts = real['Is Fraud?'].value_counts()
counts

In [None]:
counts = test['Is Fraud?'].value_counts()
counts

In [None]:
path_to_save = '/tabkde-main/synthetic/ibm_func/Ordinal_tabsyn.csv'

In [None]:
syn_OrdinaTabsyn.to_csv(path_to_save, index=False)

In [None]:
# Compute distances to real_encoded
t_0 = time.time()
d_test_to_train = compute_min_distances(test_encoded, real_encoded)
print(time.time() - t_0)
# d_tabsyn_to_train = compute_min_distances(tabsyn_encoded, real_encoded)
d_diffusion_copula_to_train = compute_min_distances(diffusion_on_copula_encoded, real_encoded)
# d_simple_kde_vae_to_train = compute_min_distances(simple_KDE_VAE_encoding_encoded, real_encoded)
# d_kde_vae_to_train = compute_min_distances(KDE_VAE_encoding_encoded, real_encoded)
d_smote_to_train = compute_min_distances(smote_encoded, real_encoded)
d_tabkde_to_train = compute_min_distances(TabKDE_encoded, real_encoded)
d_simple_kde_to_train = compute_min_distances(simple_KDE_encoded, real_encoded) 

# Compute distances to test_encoded
# d_tabsyn_to_test = compute_min_distances(tabsyn_encoded, test_encoded)
d_diffusion_copula_to_test = compute_min_distances(diffusion_on_copula_encoded, test_encoded)
# d_simple_kde_vae_to_test = compute_min_distances(simple_KDE_VAE_encoding_encoded, test_encoded)
# d_kde_vae_to_test = compute_min_distances(KDE_VAE_encoding_encoded, test_encoded)
d_smote_to_test = compute_min_distances(smote_encoded, test_encoded)
d_tabkde_to_test = compute_min_distances(TabKDE_encoded, test_encoded)
d_simple_kde_to_test = compute_min_distances(simple_KDE_encoded, test_encoded) 

In [None]:
# score_tabsyn = np.count_nonzero(d_tabsyn_to_train < d_tabsyn_to_test) / d_tabsyn_to_train.shape[0]

score_diffusion_copula = np.count_nonzero(d_diffusion_copula_to_train < d_diffusion_copula_to_test) / d_diffusion_copula_to_train.shape[0]

# score_simple_kde_vae = np.count_nonzero(d_simple_kde_vae_to_train < d_simple_kde_vae_to_test) / d_simple_kde_vae_to_train.shape[0]

# score_kde_vae = np.count_nonzero(d_kde_vae_to_train < d_kde_vae_to_test) / d_kde_vae_to_train.shape[0]

# score_smote = np.count_nonzero(d_smote_to_train < d_smote_to_test) / d_smote_to_train.shape[0]
score_tabkde = np.count_nonzero(d_tabkde_to_train < d_tabkde_to_test) / d_tabkde_to_train.shape[0]
score_simple_kde = np.count_nonzero(d_simple_kde_to_train < d_simple_kde_to_test) / d_simple_kde_to_train.shape[0]

score_diffusion_copula, score_simple_kde,  score_tabkde

In [None]:
DCR_dist_comparison(
    d_test_to_train, 
    list_synt_data=[
        d_simple_kde_to_train,
        d_simple_kde_to_test,
        # d_tabsyn_to_train,
        # d_tabsyn_to_test,
        d_diffusion_copula_to_train,
        d_diffusion_copula_to_test,
        # d_simple_kde_vae_to_train,
        # d_simple_kde_vae_to_test,
        # d_kde_vae_to_train,
        # d_kde_vae_to_test,
        d_smote_to_train,
        d_smote_to_test,
        d_tabkde_to_train,
        d_tabkde_to_test
    ], 
    names=[
        'DCR_test_to_train',
        'DCR_Simple_KDE_synth_to_train',
        'DCR_Simple_KDE_synth_to_hold_out',
        # 'DCR_tabsyn_synth_to_train',
        # 'DCR_tabsyn_synth_to_held_out',
        'DCR_diffusion_copula_synth_to_train',
        'DCR_diffusion_copula_synth_to_hold_out',
        # 'DCR_simple_KDE_VAE_synth_to_train',
        # 'DCR_simple_KDE_VAE_synth_to_hold_out',
        # 'DCR_KDE_VAE_synth_to_train',
        # 'DCR_KDE_VAE_synth_to_hold_out',
        'DCR_SMOTE_synth_to_train',
        'DCR_SMOTE_synth_to_hold_out',
        'DCR_TabKDE_synth_to_train',
        'DCR_TabKDE_synth_to_hold_out'
    ],
    evaluating_metric=None,
    title='DCR Density Comparison'
)

In [None]:
# DCR_dist_comparison(
#     d_test_to_train, 
#     list_synt_data=[
#         d_simple_kde_to_train,
#         d_simple_kde_to_test,
#         d_tabsyn_to_train,
#         d_tabsyn_to_test,
#         d_diffusion_copula_to_train,
#         d_diffusion_copula_to_test,
#         d_simple_kde_vae_to_train,
#         d_simple_kde_vae_to_test,
#         d_kde_vae_to_train,
#         d_kde_vae_to_test,
#         d_smote_to_train,
#         d_smote_to_test,
#         d_tabkde_to_train,
#         d_tabkde_to_test
#     ], 
#     names=[
#         'DCR_test_to_train',
#         'DCR_Simple_KDE_synth_to_train',
#         'DCR_Simple_KDE_synth_to_hold_out',
#         'DCR_tabsyn_synth_to_train',
#         'DCR_tabsyn_synth_to_held_out',
#         'DCR_diffusion_copula_synth_to_train',
#         'DCR_diffusion_copula_synth_to_hold_out',
#         'DCR_simple_KDE_VAE_synth_to_train',
#         'DCR_simple_KDE_VAE_synth_to_hold_out',
#         'DCR_KDE_VAE_synth_to_train',
#         'DCR_KDE_VAE_synth_to_hold_out',
#         'DCR_SMOTE_synth_to_train',
#         'DCR_SMOTE_synth_to_hold_out',
#         'DCR_TabKDE_synth_to_train',
#         'DCR_TabKDE_synth_to_hold_out'
#     ],
#     evaluating_metric=None,
#     title='DCR Density Comparison'
# )

In [None]:
Cat_columns_to_plot = []
for C in columns:
    if C in cat_columns:
        unique_count = real[C].nunique()
        if unique_count <= 10:
            Cat_columns_to_plot.append(C)
Cat_columns_to_plot

In [None]:
dataframes = [real,  diffusion_on_copula, TabKDE, simple_KDE]

plot_marginals(dataframes, categorical_features = cat_columns, 
               plot_type='kde', 
               title=f'{data_name}', 
               columns_to_plot= num_columns + Cat_columns_to_plot ,
               df_names= ["real", "diffusion_on_copula",
                         "TabKDE", "simple_KDE"], alpha=0.4, save_path = None, shade = False)

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

def plot_marginals_interactive(dataframes, categorical_features, df_names=None, columns_to_plot=None,
                   plot_type='kde', title='Marginal Plots', alpha=0.6, save_path=None,
                   interactive=False, shade=True):
    """
    Plot marginal distributions for numerical and categorical features.

    Parameters:
    - dataframes: List of pandas DataFrames to plot.
    - categorical_features: List of column names considered as categorical.
    - df_names: List of names corresponding to each DataFrame.
    - columns_to_plot: List of columns to plot. If None, plot all columns.
    - plot_type: 'kde' or 'hist' for numerical distribution.
    - title: Title of the plot.
    - alpha: Transparency level for all plots.
    - save_path: Path to save the plots. A folder is created if it does not exist.
    - interactive: If True, uses Plotly for interactive plots.
    - shade: If True and plot_type is 'kde', fills the area under the curve.
    """
    if not isinstance(dataframes, list):
        raise ValueError("dataframes must be a list of pandas DataFrames.")
    if df_names and len(df_names) != len(dataframes):
        raise ValueError("Length of df_names must match the number of dataframes.")

    if df_names is None:
        df_names = [f'DF {idx + 1}' for idx in range(len(dataframes))]

    all_columns = dataframes[0].columns
    columns_to_plot = columns_to_plot if columns_to_plot else all_columns

    if save_path:
        os.makedirs(save_path, exist_ok=True)

    num_features = list(set(columns_to_plot) - set(categorical_features))

    for feature in columns_to_plot:
        if interactive:
            fig = go.Figure()

            if feature in categorical_features:
                categories = dataframes[0][feature].dropna().unique()
                for idx, df in enumerate(dataframes):
                    df_name = df_names[idx]
                    value_counts = df[feature].value_counts(normalize=True).reindex(categories, fill_value=0)
                    fig.add_trace(go.Bar(
                        x=categories,
                        y=value_counts.values,
                        name=df_name,
                        opacity=alpha
                    ))
                fig.update_layout(
                    barmode='group',
                    title=f'{feature} - {title}',
                    xaxis_title=feature,
                    yaxis_title='Frequency'
                )

            elif feature in num_features:
                for idx, df in enumerate(dataframes):
                    df_name = df_names[idx]
                    col_data = df[feature].dropna()
                    if plot_type == 'kde':
                        sns_fig = sns.kdeplot(col_data, bw_adjust=1)
                        kde_x, kde_y = sns_fig.get_lines()[0].get_data()
                        plt.close()  # Close the matplotlib plot
                        if shade:
                            fig.add_trace(go.Scatter(
                                x=kde_x,
                                y=kde_y,
                                fill='tozeroy',
                                mode='lines',
                                name=df_name,
                                opacity=alpha
                            ))
                        else:
                            fig.add_trace(go.Scatter(
                                x=kde_x,
                                y=kde_y,
                                mode='lines',
                                name=df_name,
                                opacity=alpha
                            ))
                    elif plot_type == 'hist':
                        fig.add_trace(go.Histogram(
                            x=col_data,
                            name=df_name,
                            opacity=alpha,
                            histnorm='probability'
                        ))

                fig.update_layout(
                    title=f'{feature} - {title}',
                    xaxis_title=feature,
                    yaxis_title='Density' if plot_type == 'kde' else 'Frequency',
                    barmode='overlay' if plot_type == 'hist' else None
                )

            fig.show()

        else:
            plt.figure(figsize=(8, 5))

            if feature in categorical_features:
                bar_width = 0.35 / len(dataframes)
                categories = dataframes[0][feature].dropna().unique()
                positions = np.arange(len(categories))

                for idx, df in enumerate(dataframes):
                    df_name = df_names[idx]
                    value_counts = df[feature].value_counts(normalize=True).reindex(categories, fill_value=0)
                    bar_position = positions + (idx * bar_width) - (bar_width * len(dataframes) / 2)
                    plt.bar(bar_position, value_counts.values, width=bar_width, alpha=alpha,
                            label=f'{df_name}')

                plt.xticks(positions, categories)

            elif feature in num_features:
                for idx, df in enumerate(dataframes):
                    df_name = df_names[idx]
                    if plot_type == 'kde':
                        sns.kdeplot(df[feature].dropna(), label=f'{df_name}',
                                    alpha=alpha, fill=shade)
                    elif plot_type == 'hist':
                        plt.hist(df[feature].dropna(), bins=30, alpha=alpha,
                                 label=f'{df_name}', density=True)
                    else:
                        raise ValueError("plot_type must be either 'kde' or 'hist'")

            plt.title(f'{feature} - {title}')
            plt.xlabel(feature)
            plt.ylabel('Density' if feature in num_features else 'Frequency')
            plt.legend()
            plt.tight_layout()

            if save_path:
                file_name = os.path.join(save_path, f'{feature}.png')
                plt.savefig(file_name)

            plt.show()


In [None]:
dataframes = [real,  diffusion_on_copula, TabKDE, simple_KDE]

In [None]:
plot_marginals_interactive(dataframes, categorical_features = cat_columns, 
               plot_type='kde', 
               title=f'{data_name}', 
               columns_to_plot= num_columns + Cat_columns_to_plot ,
               df_names= ["real",  "CopulaDiff",
                         "TabKDE", "simple_KDE"], alpha=0.4, save_path = None, interactive = True)

In [None]:

data_names = [
    'real.csv', 'diffusion_on_copula.csv', 'TabKDE.csv', 'simple_KDE.csv'
]

save_path = f'Eval Density/{data_name}/correlation_divergence/'

divergence_matrix = {} 
for i in range(1,len(dataframes)):
    synth_data =  dataframes[i]
    divergence_matrix[f'real_vs_{data_names[i]}'] = compute_correlation_divergence(dataframes[0], 
                                                        synth_data, show_numbers = False, 
                                                        title = f'{data_name}: real vs {data_names[i][:-4]}',
                                                        save_path = save_path+f'real_vs_{data_names[i][:-4]}')

In [None]:
dataframes = [real, diffusion_on_copula, TabKDE]

plot_marginals(dataframes, categorical_features = cat_columns, 
               plot_type='kde', 
               title=f'{data_name[:-6]}', 
               columns_to_plot= num_columns + Cat_columns_to_plot ,
               df_names= ["real", "CopulaDiff",  "TabKDE", ], alpha=0.8, shade = False, 
save_path =f'Eval Density/{data_name}/marginals')