In [22]:
import pandas as pd
from sdv.single_table import TVAESynthesizer

from domias.evaluator import evaluate_performance
from domias.models.generator import GeneratorInterface
from domias.models.ctgan import CTGAN
from sdv.metadata import SingleTableMetadata


from scipy import stats
from scipy.stats import multivariate_normal


import numpy as np

from itertools import zip_longest

from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing, fetch_covtype, load_digits

import warnings
warnings.filterwarnings("ignore")

In [23]:
#Import dataset
df_uci = pd.read_csv(r'C:\Users\jordy\OneDrive\MSc_Python\Individual_Project\Data\UCI_Credit_Card.csv')
df_uci.drop(columns=['ID'], inplace=True)
df_uci.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
0,20000.0,2,2,1,24,2,2,-1,-1,-2,...,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,120000.0,2,2,2,26,-1,2,0,0,0,...,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,90000.0,2,2,2,34,0,0,0,0,0,...,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,50000.0,2,2,1,37,0,0,0,0,0,...,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,50000.0,1,2,1,57,-1,0,-1,0,0,...,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0


In [28]:
# #convert dataframe to array
# arr_uci = np.array(df_uci.iloc[:, 1:])
# arr_uci.shape

(30000, 24)

In [29]:
# def get_dataset() -> np.ndarray:
#     def data_loader() -> np.ndarray:
#         scaler = StandardScaler()
#         X =arr_uci
#         np.random.shuffle(X)
#         return scaler.fit_transform(X)

#     return data_loader()

In [24]:
def get_generator(
    gan_method: str = "CTGAN",
    epochs: int = 1000,
    seed: int = 0,
) -> GeneratorInterface:
    class LocalGenerator(GeneratorInterface):
        def __init__(self) -> None:
            if gan_method == "TVAE":
                syn_model = TVAESynthesizer(metadata, epochs=epochs)
            elif gan_method == "CTGAN":
                syn_model = CTGAN(epochs=epochs)
            elif gan_method == "KDE":
                syn_model = None
            else:
                raise RuntimeError()
            self.method = gan_method
            self.model = syn_model
                

        def fit(self, data: pd.DataFrame) -> "LocalGenerator":
            if self.method == "KDE":
                self.model = stats.gaussian_kde(np.transpose(data))
            else:
                self.model.fit(data)
            return self

        def generate(self, count: int) -> pd.DataFrame:
            
            if gan_method == "KDE":
                samples = pd.DataFrame(self.model.resample(count).transpose(1, 0))
            elif gan_method == "TVAE":
                samples = self.model.sample(count)
            elif gan_method == "CTGAN":
                samples = self.model.generate(count)
            else:
                raise RuntimeError()

            return samples
            
            #return self.model.sample(count)

    return LocalGenerator()


#Loading metadata from dataset for use in TVAESynthesizer

# from sdv.metadata import SingleTableMetadata

# metadata = SingleTableMetadata()
# metadata.detect_from_dataframe(data=df_dataset)







In [6]:
# def get_generator(
#     gan_method: str = "CTGAN",
#     metadata: SingleTableMetadata = None,
#     epochs: int = 1000,
#     seed: int = 0,
# ) -> GeneratorInterface:
#     class LocalGenerator(GeneratorInterface):
#         def __init__(self, metadata=None) -> None:
#             if gan_method == "TVAE":
#                 self.model = TVAESynthesizer(metadata=metadata, epochs=epochs)
#             elif gan_method == "CTGAN":
#                 self.model = CTGAN(epochs=epochs)
#             elif gan_method == "KDE":
#                 self.model = None
#             else:
#                 raise RuntimeError("Unknown GAN method specified.")
#             self.method = gan_method
#             self.metadata = metadata

#         def fit(self, data: pd.DataFrame) -> "LocalGenerator":
#             if self.method == "KDE":
#                 self.model = stats.gaussian_kde(np.transpose(data))
#             else:
#                 self.model.fit(data)
#             return self

#         def generate(self, count: int) -> pd.DataFrame:
#             if self.method == "KDE":
#                 samples = pd.DataFrame(self.model.resample(count).transpose(1, 0))
#             elif self.method == "TVAE":
#                 samples = self.model.sample(count)
#             elif self.method == "CTGAN":
#                 samples = self.model.generate(count)
#             else:
#                 raise RuntimeError("Unknown GAN method specified.")
#             return samples

#     return LocalGenerator(metadata=metadata)



# #Loading metadata from dataset for use in TVAESynthesizer
# dataset = get_dataset()
# df_dataset = pd.DataFrame(dataset)
# df_dataset.rename(columns={0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8:'8', 9:'9', 10:'10', 11:'11', 12:'12',
#                           13:'13', 14:'14', 15:'15', 16:'16', 17:'17', 18:'18', 19:'19', 20:'20', 21:'21', 22:'22', 23:'23'}, inplace = True)


In [31]:
# dataset = get_dataset()
# df_dataset = pd.DataFrame(dataset)
# df_dataset.rename(columns={0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8:'8', 9:'9', 10:'10', 11:'11', 12:'12',
#                           13:'13', 14:'14', 15:'15', 16:'16', 17:'17', 18:'18', 19:'19', 20:'20', 21:'21', 22:'22', 23:'23', 24:'24'}, inplace = True)


# #Create loop to create a metadta object for each column ablated dataframe
# metadata_objects={}
# for i, col in enumerate(df_dataset.columns):
    
#     df_temp = df_dataset.copy()
#     df_temp.drop(columns=[col], inplace=True)
    
#     metadata_name = f"metadata_{col}"
#     metadata_objects[metadata_name] = SingleTableMetadata()
#     metadata_objects[metadata_name].detect_from_dataframe(data=df_temp)

# #metadata_objects[metadata_1]
# #print(type(metadata_objects['metadata_1']))
# #metadata_objects['metadata_1']

In [33]:

# # #Create loop to create a metadta object for each column ablated dataframe
# # metadata_objects={}
# # for i, col in enumerate(df_dataset.columns):
    
# #     df_temp = df_dataset.copy()
# #     df_temp.drop(columns=[col], inplace=True)
    
# #     metadata_name = f"metadata_{col}"
# #     metadata_objects[metadata_name] = SingleTableMetadata()
# #     metadata_objects[metadata_name].detect_from_dataframe(data=df_temp)

# # mem_set_size = 1000 -> originally what training size was
# reference_set_size = 10000 #held out set
# training_epochs = [2000]
# training_sizes = [10000]
# #synthetic_sizes = [200]
# density_estimator = "kde"  # prior, kde, bnaf
# gen_size = 500 #same as synthetic_sizes

# method = "TVAE"

# leave_one_out_results = {}


# for col, key in zip(df_dataset.columns, metadata_objects.keys()):
#     print(col)
#     print(key)
    
    
#     metadata = metadata_objects[key]
#     #print(type(metadata))
#     print(metadata)
    
#     df_temp = df_dataset.drop(columns=[col])
# #     df_temp.drop(columns=[col], inplace=True)
# #     df_temp.rename(columns={'0': 0}, inplace=True)
#     print(df_temp.columns)
#     arr_temp = np.array(df_temp)
    
#     #print(arr_temp)
    

    
    

#     # Create a dictionary to store the results for the current column
#     column_results = {}

#     # Set the number of iterations
#     num_iterations = 10

#     for iteration in range(1, num_iterations+1):
#         # Initialize the result dictionary for the current iteration
#         iteration_results = {}

#         for training_size in training_sizes:
#             # Initialize the result dictionary for the current training size
#             size_results = {}

#             for training_epoch in training_epochs:
#                 generator = get_generator(
#                     gan_method=method,
#                     epochs=training_epoch,
#                 )

#                 perf = evaluate_performance(
#                     generator,
#                     arr_temp,
#                     training_size,
#                     reference_set_size,
#                     training_epochs=training_epoch,
#                     synthetic_sizes=[gen_size],
#                     density_estimator=density_estimator,
#                 )

#                 # Store the MIA performance for the current training size and epoch
#                 size_results[training_epoch] = perf[gen_size]["MIA_performance"]

#             # Store the results for the current training size
#             iteration_results[training_size] = size_results

#         # Store the results for the current iteration
#         column_results[iteration] = iteration_results
    
#     # Store the results for the current column
#     leave_one_out_results[col] = column_results
    

# # Print the results
# for col, results in leave_one_out_results.items():
#     print(f"Column Omitted {col}:")
#     for iteration, iteration_results in results.items():
#         print(f"Iteration {iteration}:")
#         for training_size, size_results in iteration_results.items():
#             print(f"Training Size {training_size}:")
#             for training_epoch, mia_performance in size_results.items():
#                 print(f"Training Epoch {training_epoch}: MIA Performance = {mia_performance}")
#             print()

0
metadata_0
{
    "columns": {
        "1": {
            "sdtype": "numerical"
        },
        "2": {
            "sdtype": "numerical"
        },
        "3": {
            "sdtype": "numerical"
        },
        "4": {
            "sdtype": "numerical"
        },
        "5": {
            "sdtype": "numerical"
        },
        "6": {
            "sdtype": "numerical"
        },
        "7": {
            "sdtype": "numerical"
        },
        "8": {
            "sdtype": "numerical"
        },
        "9": {
            "sdtype": "numerical"
        },
        "10": {
            "sdtype": "numerical"
        },
        "11": {
            "sdtype": "numerical"
        },
        "12": {
            "sdtype": "numerical"
        },
        "13": {
            "sdtype": "numerical"
        },
        "14": {
            "sdtype": "numerical"
        },
        "15": {
            "sdtype": "numerical"
        },
        "16": {
            "sdtype": "numerical"
        },
 

InvalidDataError: The provided data does not match the metadata:
The columns ['0'] are not present in the metadata.

The metadata columns ['23'] are not present in the data.

In [None]:
# # Create empty lists to store the results
# iterations = []
# epochs = []
# srcs = []
# aucrocs = []
# column_ablated = []

# # Iterate over the results
# for col, results in leave_one_out_results.items():
#     for iteration, iteration_results in results.items():
#         for training_size, size_results in iteration_results.items():
#             for training_epoch, mia_performance in size_results.items():
#                 # Append values to the lists
#                 iterations.append(iteration)
#                 epochs.append(training_epoch)
#                 srcs.append(list(mia_performance.keys())[8])
#                 aucrocs.append(list(mia_performance.values())[8]['aucroc'])
#                 column_ablated.append(col)  # Add the column_ablated value

# # Create a list of dictionaries containing the data
# data_list = [{'column_ablated': column, 'iteration': iteration, 'epoch': epoch, 'src': src, 'aucroc': aucroc}
#              for column, iteration, epoch, src, aucroc in zip(column_ablated, iterations, epochs, srcs, aucrocs)]

# # Create a DataFrame from the list of dictionaries
# data = pd.DataFrame(data_list)

# # Convert data types if needed
# data['iteration'] = data['iteration'].astype(int)
# data['epoch'] = data['epoch'].astype(int)
# data['aucroc'] = data['aucroc'].astype(float)
# data.to_csv('domias_kde_col_abl_UCI_TVAE_fix.csv')
# data

In [None]:
# mean_values = data.groupby('column_ablated')['aucroc'].mean()

# print(mean_values)

In [None]:
# sns.barplot(data=data, x='column_ablated', y='aucroc' )

# # Customize the plot
# plt.xlabel('Column ablated')
# plt.ylabel('AUC-ROC')
# plt.title('MIA Performance for Column Ablation Study')
# plt.ylim(ymin=0.475, ymax=0.525)
# #plt.legend(title='src')
# #plt.grid(True)
# plt.show()

In [None]:
# for col, results in leave_one_out_results.items():
#     print(f"Column Omitted {col}:")
#     for iteration, iteration_results in results.items():
#         print(f"Iteration {iteration}:")
#         for training_size, size_results in iteration_results.items():
#             print(f"Training Size {training_size}:")
#             for training_epoch, mia_performance in size_results.items():
#                 print(f"Training Epoch {training_epoch}: MIA Performance = {mia_performance}")
#             print()


# # Create empty lists to store the results
# epochs = []
# srcs = []
# aucrocs = []


# # Iterate over the results
# for col, results in leave_one_out_results.items():
#     for iteration, iteration_results in results.items():
#         for training_size, size_results in iteration_results.items():
#             for training_epoch, mia_performance in size_results.items():
#                 epochs.append(training_epoch)
#                 srcs.append(list(mia_performance.keys()))
#                 aucrocs.append([value['aucroc'] for value in mia_performance.values()])

# # Create a DataFrame from the lists
# data = pd.DataFrame({'epoch': epochs, 'src': srcs, 'aucroc': aucrocs})

# # Convert lists to individual rows
# data = data.explode('src').explode('aucroc')

# # Convert data types
# data['epoch'] = data['epoch'].astype(int)
# data['aucroc'] = data['aucroc'].astype(float)

# #print(data.head())

# #Filtering for just domias results
# filtered_df = data[data['src'] == 'domias']

# filtered_df.reset_index(inplace=True)


# print(filtered_df.head())
# print(filtered_df.shape)

In [None]:
# filtered_df[filtered_df['index'] == 0].count()

In [None]:
# sns.barplot(data=filtered_df, x='index', y='aucroc')

# # Customize the plot
# plt.xlabel('Column ablated')
# plt.ylabel('AUC-ROC')
# plt.title('MIA Performance for Column Ablation Study')
# plt.ylim(ymin=0.4, ymax=0.6)
# #plt.legend(title='src')
# #plt.grid(True)
# plt.show()

In [11]:

# #         def __init__(self) -> None:
# #             if gan_method == "TVAE":
# #                 syn_model = TVAESynthesizer(metadata, epochs=epochs)
# #             elif gan_method == "CTGAN":
# #                 syn_model = CTGAN(epochs=epochs)
# #             elif gan_method == "KDE":
# #                 syn_model = None
# #             else:
# #                 raise RuntimeError()
# #             self.method = gan_method
# #             self.model = syn_model



# def get_generator(gan_method: str = "TVAE", epochs: int = 1000, metadata = None) -> GeneratorInterface:
#     class LocalGenerator(GeneratorInterface):
#         def __init__(self, metadata=None) -> None:
#             self.metadata = metadata
#             if gan_method == "TVAE":
#                 syn_model = TVAESynthesizer(metadata, epochs=epochs)
#             elif gan_method == "KDE":
#                 syn_model = None
#             else:
#                 raise RuntimeError()
                
#             self.method = gan_method
#             self.model = syn_model

#         def fit(self, data: pd.DataFrame) -> "LocalGenerator":
#             if gan_method == "KDE":
#                 self.model = stats.gaussian_kde(np.transpose(data))
#             else:
#                 self.model.fit(data)
#             return self

#         def generate(self, count: int) -> pd.DataFrame:
#             if gan_method == "KDE":
#                 samples = pd.DataFrame(self.model.resample(count).transpose(1, 0))
#             elif gan_method == "TVAE":
#                 samples = self.model.sample(count)
#             else:
#                 raise RuntimeError()

#             return samples

#     return LocalGenerator()


# leave_one_out_results = {}
# metadata_objects = {}

# for i, col in enumerate(df_dataset.columns):
#     print(col)
    
#     df_temp = df_dataset.copy()
#     df_temp.drop(columns=[col], inplace=True)
#     arr_temp = np.array(df_temp)

#     # Create a new metadata object for each loop iteration with a unique name
#     metadata_name = f"metadata_{col}"
#     metadata_objects[metadata_name] = SingleTableMetadata()
#     metadata_objects[metadata_name].detect_from_dataframe(data=df_temp)
#     print(type(metadata_objects[metadata_name]))

#     # Create the generator using the corresponding metadata
#     generator = get_generator(
#         epochs=training_epochs[0],  # Assuming you only use the first epoch in the list
#         metadata=metadata_objects[metadata_name]
#     )

In [10]:

# # mem_set_size = 1000 -> originally what training size was
# reference_set_size = 10000 #held out set
# training_epochs = [2000]
# training_sizes = [10000]
# #synthetic_sizes = [200]
# density_estimator = "kde"  # prior, kde, bnaf
# gen_size = 500 #same as synthetic_sizes

# method = "TVAE"

# leave_one_out_results = {}
# metadata_objects={}


# leave_one_out_results = {}
# metadata_objects = {}

# for i, col in enumerate(df_dataset.columns):
#     print(col)
    
#     df_temp = df_dataset.copy()
#     df_temp.drop(columns=[col], inplace=True)
#     arr_temp = np.array(df_temp)

#     # Create a new metadata object for each loop iteration with a unique name
#     metadata_name = f"metadata_{col}"
#     metadata_objects[metadata_name] = SingleTableMetadata()
#     metadata = metadata_objects[metadata_name].detect_from_dataframe(data=df_temp)

#     # Create the generator using the corresponding metadata
#     generator = get_generator(
#         gan_method=method,
#         epochs=training_epochs[0],  # Assuming you only use the first epoch in the list
#         metadata=metadata,
#     )

#     # Rest of your code for evaluation and storing results...


In [None]:
arr_uci = np.array(df_uci)
def get_dataset() -> np.ndarray:
    def data_loader() -> np.ndarray:
        scaler = StandardScaler()
        X =arr_uci
        np.random.shuffle(X)
        return scaler.fit_transform(X)

    return data_loader()

dataset = get_dataset()
df_dataset = pd.DataFrame(dataset)

df_dataset.rename(columns={0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8:'8', 9:'9', 10:'10', 11:'11', 12:'12',
                        13:'13', 14:'14', 15:'15', 16:'16', 17:'17', 18:'18', 19:'19', 20:'20', 21:'21', 22:'22', 23:'23'}, inplace = True)

# metadata = SingleTableMetadata()
# metadata.detect_from_dataframe(data=df_dataset)




# #Create loop to create a metadta object for each column ablated dataframe
# metadata_objects={}
# for i, col in enumerate(df_dataset.columns):
    
#     df_temp = df_dataset.copy()
#     df_temp.drop(columns=[col], inplace=True)
    
#     metadata_name = f"metadata_{col}"
#     metadata_objects[metadata_name] = SingleTableMetadata()
#     metadata_objects[metadata_name].detect_from_dataframe(data=df_temp)

# mem_set_size = 1000 -> originally what training size was
reference_set_size = 3000 #held out set
training_epochs = [1000]
training_sizes = [3000]
#synthetic_sizes = [200]
density_estimator = "kde"  # prior, kde, bnaf
gen_size = 500 #same as synthetic_sizes

method = "TVAE"

leave_one_out_results = {}


for i, col in enumerate(df_uci):
    print(i)
    print(col)
    
    
    df_temp = df_uci.drop(df_uci.columns[i], axis=1)
    arr_uci = np.array(df_temp)
    print(arr_uci.shape)
    def get_dataset() -> np.ndarray:
        def data_loader() -> np.ndarray:
            scaler = StandardScaler()
            X =arr_uci
            np.random.shuffle(X)
            return scaler.fit_transform(X)

        return data_loader()

    dataset = get_dataset()
    df_dataset = pd.DataFrame(dataset)
    print(df_dataset)

    df_dataset.rename(columns={0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8:'8', 9:'9', 10:'10', 11:'11', 12:'12',
                            13:'13', 14:'14', 15:'15', 16:'16', 17:'17', 18:'18', 19:'19', 20:'20', 21:'21', 22:'22'}, inplace = True)

    metadata = SingleTableMetadata()
    metadata.detect_from_dataframe(data=df_dataset)

     
    arr_temp = np.array(df_dataset)
    

    
    

    # Create a dictionary to store the results for the current column
    column_results = {}

    # Set the number of iterations
    num_iterations = 5

    for iteration in range(1, num_iterations+1):
        # Initialize the result dictionary for the current iteration
        iteration_results = {}

        for training_size in training_sizes:
            # Initialize the result dictionary for the current training size
            size_results = {}

            for training_epoch in training_epochs:
                generator = get_generator(
                    gan_method=method,
                    epochs=training_epoch,
                )

                perf = evaluate_performance(
                    generator,
                    arr_temp,
                    training_size,
                    reference_set_size,
                    training_epochs=training_epoch,
                    synthetic_sizes=[gen_size],
                    density_estimator=density_estimator,
                )

                # Store the MIA performance for the current training size and epoch
                size_results[training_epoch] = perf[gen_size]["MIA_performance"]

            # Store the results for the current training size
            iteration_results[training_size] = size_results

        # Store the results for the current iteration
        column_results[iteration] = iteration_results
    
    # Store the results for the current column
    leave_one_out_results[col] = column_results
    

# Print the results
for col, results in leave_one_out_results.items():
    print(f"Column Omitted {col}:")
    for iteration, iteration_results in results.items():
        print(f"Iteration {iteration}:")
        for training_size, size_results in iteration_results.items():
            print(f"Training Size {training_size}:")
            for training_epoch, mia_performance in size_results.items():
                print(f"Training Epoch {training_epoch}: MIA Performance = {mia_performance}")
            print()

0
LIMIT_BAL
(30000, 23)
             0         1         2         3         4         5         6   \
0     -1.234323  0.185828 -1.057295 -0.486615  0.904712  1.782348  0.138865   
1      0.810161  0.185828 -1.057295 -0.269643  0.014861  0.111736  0.138865   
2      0.810161 -1.079457  0.858557  1.900084  0.014861  0.111736  0.138865   
3      0.810161  0.185828 -1.057295  0.055816  0.014861  0.111736  0.138865   
4      0.810161  0.185828 -1.057295 -0.052670  0.014861  0.111736  0.138865   
...         ...       ...       ...       ...       ...       ...       ...   
29995  0.810161  0.185828  0.858557 -1.029047  0.014861  0.111736  0.138865   
29996  0.810161 -1.079457 -1.057295  1.683111  0.014861  0.111736  0.138865   
29997  0.810161 -1.079457 -1.057295 -0.269643 -1.764843 -1.558876 -1.532192   
29998  0.810161  0.185828 -1.057295  1.249166 -1.764843 -1.558876 -1.532192   
29999  0.810161  1.451114 -1.057295  0.706734  0.014861  0.111736  0.138865   

             7         8   

In [None]:
# Create empty lists to store the results
iterations = []
epochs = []
srcs = []
aucrocs = []
column_ablated = []

# Iterate over the results
for col, results in leave_one_out_results.items():
    for iteration, iteration_results in results.items():
        for training_size, size_results in iteration_results.items():
            for training_epoch, mia_performance in size_results.items():
                # Append values to the lists
                iterations.append(iteration)
                epochs.append(training_epoch)
                srcs.append(list(mia_performance.keys())[8])
                aucrocs.append(list(mia_performance.values())[8]['aucroc'])
                column_ablated.append(col)  # Add the column_ablated value

# Create a list of dictionaries containing the data
data_list = [{'column_ablated': column, 'iteration': iteration, 'epoch': epoch, 'src': src, 'aucroc': aucroc}
             for column, iteration, epoch, src, aucroc in zip(column_ablated, iterations, epochs, srcs, aucrocs)]

# Create a DataFrame from the list of dictionaries
data = pd.DataFrame(data_list)

# Convert data types if needed
data['iteration'] = data['iteration'].astype(int)
data['epoch'] = data['epoch'].astype(int)
data['aucroc'] = data['aucroc'].astype(float)
data.to_csv('domias_kde_col_abl_UCI_TVAE_fix.csv')
data

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt
sns.barplot(data=data, x='column_ablated', y='aucroc' )

# Customize the plot
plt.xlabel('Column ablated')
plt.ylabel('AUC-ROC')
plt.title('MIA Performance for Column Ablation Study')
plt.ylim(ymin=0.4, ymax=0.65)
#plt.legend(title='src')
#plt.grid(True)
plt.show()

In [None]:
mean_values = data.groupby('column_ablated')['aucroc'].mean()

print(mean_values)