# Attribute Inference Attacks on the data

## Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as spio
from os.path import join as osj
import pandas as pd
import seaborn as sns
import random
import pickle
import os

import logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=logging.INFO,
    datefmt="%Y-%m-%d %H:%M:%S"
)
logger = logging.getLogger()

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


## Functions

In [20]:
def get_ids():
    valid_patients = pd.read_csv(osj("..", "files", "valid_patients.csv"), header=None).to_numpy().reshape(-1)
    return valid_patients

# Get all beats 
def get_dp_beats(m):
    with open(osj("..", "dp_data_single", "dataset_beats", f"{m}_30min_beats.pkl"), "rb") as f:
        dp_single = pickle.load(f)
    with open(osj("..", "dp_data_trio", "dataset_beats", f"{m}_30min_beats.pkl"), "rb") as f:
        dp_trio = pickle.load(f)
    
    # filter only valid patients
    valid_patients = get_ids() 
    f_dp_single = {
        epsilon: {
            pid: data for pid, data in patients.items() if int(pid) in valid_patients
        }
        for epsilon, patients in dp_single.items()
    }
    f_dp_trio = {
        epsilon: {
            pid: data for pid, data in patients.items() if int(pid) in valid_patients
        }
        for epsilon, patients in dp_trio.items()
    }
    del dp_single
    del dp_trio

    return f_dp_single, f_dp_trio

def get_real_beats():
    with open(osj("..", "data_single", "dataset_beats", "30min_beats.pkl"), "rb") as f:
        real_single = pickle.load(f)
    with open(osj("..", "data_trio", "dataset_beats", "30min_beats.pkl"), "rb") as f:
        real_trio = pickle.load(f)

    # filter only valid patients
    valid_patients = get_ids() 
    f_real_single = {pid: data for pid, data in real_single.items() if int(pid) in valid_patients}
    f_real_trio = {pid: data for pid, data in real_trio.items() if int(pid) in valid_patients}
    del real_single
    del real_trio

    return f_real_single, f_real_trio

# Get patient attributes
def get_patient_infos():
    with open(osj("..", "files", "patient_infos.pkl"), "rb") as f:
        patient_info = pickle.load(f)
    
    valid_patients = get_ids() 
    f_patient_info = {pid: data for pid, data in patient_info.items() if int(pid) in valid_patients}

    return f_patient_info

# extract the required attributes from the patient data
def get_patient_attribute(attribute, data):
    attr_dict = {str(pid): data[attribute] for pid, data in data.items()}
    attr_array = list(attr_dict.values())
    return attr_array

# Prepare train and test data into the required format
def prepare_inference_data(signals, attr_array, beats_per_patient=1000, beats_only=True):
    """
    Prepare the data for attribute inference attack.

    Parameters:
        signals: ndarray with structure [patient][beat/class/label][beat(128,)]
        attr_array: List or ndarray, target attribute
        beats_per_patient: int, Count of beats per patient (default: 1000)
        beats_only: bool, if every beat is separated (default: True)

    Returns:
        X: ndarray in format [n_patients, beats_per_patient, beat_length] (if beats_only=False)
           or [total_beats, beat_length] (if beats_only=True)
        y: ndarray in format [n_patients] (if beats_only=False)
           or [total_beats] (if beats_only=True)
    """
    X = []
    y = []
    i = 0

    for patient_id in signals:
        attr = attr_array[i]
        patient_beats = signals[patient_id]['beats']

        i += 1
            
        # Sampling random beats from the patient
        random.seed(42)
        n_beats = min(beats_per_patient, len(patient_beats))
        sampled_beats = patient_beats[np.random.choice(len(patient_beats), n_beats, replace=False)]

        if beats_only:
            X.append(sampled_beats)
            y.append(np.full(n_beats, attr))
        else:
            if len(sampled_beats) < beats_per_patient:
                # Padding with zeros if less than beats_per_patient
                padding = np.zeros((beats_per_patient - len(sampled_beats), sampled_beats.shape[1]))
                sampled_beats = np.vstack((sampled_beats, padding))
            X.append(sampled_beats)
            y.append(attr)

    if beats_only:
        X = np.concatenate(X, axis=0)  # shape: [n_beats, 280]
        y = np.concatenate(y, axis=0)  # shape: [n_beats]
    else:
        X = np.array(X) # shape: [n_patients, beats_per_patient, 280]
        y = np.array(y) # shape: [n_patients]
    
    return X, y


# train a chosen model for attribute inference
def train_attribute_inference(X, y, attr_array, task="classification"):

    """
    Training of the specified attribute inference model. 
        Classification: Random Forest
        Regression: XGBRegressor, Gradient Boosting, Linear Regression, MLP Regressor

    Parameters:
        signals: ndarray with structure [patient][beat/class/label][beat(128,)]
        attr_array: dict with structure {patient_index: {"gender": 0/1, ...}}
        task: str, type of model to use for inference (default: "randomForest")

    Returns:
        y_test: real labels
        y_pred: predicted labels
        pred_classes: predicted classes (for classification tasks)
    """

    if task == "classification":

        # X, y = prepare_inference_data(signals, attr_array, beats_per_patient=1500, beats_only=False)
        # X_flattened = X.reshape(X.shape[0], X.shape[1] * X.shape[2])  # Shape: (patients, 1000*280)
        # X_train, X_test, y_train, y_test = train_test_split(X_flattened, y, test_size=0.2, random_state=42, stratify=y)
        
        # X, y = prepare_inference_data(signals, attr_array, beats_per_patient=50, beats_only=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) # TODO Train-test split should be on patient level, not on beat level
        
        clf = RandomForestClassifier(
            n_estimators=100,
            max_depth=20,
            n_jobs=-1,
            random_state=42
        )

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        pred_classes = list(clf.classes_) 

        return y_test, y_pred, pred_classes
    
    elif task == "regression":

        # X, y = prepare_inference_data(signals, attr_array, beats_per_patient=1000, beats_only=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # xgb_model = XGBRegressor(
        #     tree_method="gpu_hist",
        #     n_estimators=100,
        #     max_depth=10,
        #     learning_rate=0.1,
        #     random_state=42,
        #     n_jobs=-1
        # )
        # xgb_model = XGBRegressor(tree_method="gpu_hist")
        # xgb_model.fit(X_train, y_train)
        # y_pred = xgb_model.predict(X_test)

        # model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, max_depth=3, random_state=42)
        # model.fit(X_train, y_train)
        # y_pred = model.predict(X_test)

        # model = LinearRegression()
        # model.fit(X_train, y_train)
        # y_pred = model.predict(X_test)

        model = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        return y_test, y_pred
    
    else:
        raise ValueError(f"Unsupported task type: {task}")



## Attacks on real data

In [70]:
real_single, real_trio = get_real_beats()
patient_infos = get_patient_infos()
patient_ids = get_ids()

In [27]:
min_beats_s = min(len(data['beats']) for data in real_single.values())
min_beats_t = min(len(data['beats']) for data in real_trio.values())
print(f"Minimal value of single beats: {min_beats_s}")
print(f"Minimal value of trio beats: {min_beats_t}")

Minimal value of single beats: 1516
Minimal value of trio beats: 1516


In [5]:
len(real_single[100]['beats'][0])

128

### "gender" attribute inference

In [141]:
# get attribute to be predicted
attr_array = get_patient_attribute("gender", patient_infos)

In [None]:
# train the attack model for single beats
y_test, y_pred, pred_classes = train_attribute_inference(real_single, attr_array, task="classification")

# print performance metrics
print(classification_report(y_test, y_pred, target_names=pred_classes))

# last trained on 50 beats per patient

              precision    recall  f1-score   support

           F       0.88      0.94      0.91       150
           M       0.95      0.89      0.92       190

    accuracy                           0.91       340
   macro avg       0.91      0.92      0.91       340
weighted avg       0.92      0.91      0.91       340



In [112]:
# train the attack model for trio beats
y_test, y_pred, pred_classes = train_attribute_inference(real_trio, attr_array, task="classification")

# print performance metrics
print(classification_report(y_test, y_pred, target_names=pred_classes))

              precision    recall  f1-score   support

           F       0.67      0.67      0.67         3
           M       0.75      0.75      0.75         4

    accuracy                           0.71         7
   macro avg       0.71      0.71      0.71         7
weighted avg       0.71      0.71      0.71         7



### "age" attribute inference

In [None]:
# get attribute to be predicted
attr_array = get_patient_attribute("age", patient_infos)

real_single_age = real_single.copy()
real_trio_age = real_trio.copy()

# patient 3 (idx = 2) and 27 (idx = 26, will be 25) have no age and therefore will be removed
del attr_array[2]

key_to_remove = list(real_single_age.keys())[2]
del real_single_age[key_to_remove]
del real_trio_age[key_to_remove]

del attr_array[25]

key_to_remove = list(real_single_age.keys())[25]
del real_single_age[key_to_remove]
del real_trio_age[key_to_remove]

In [None]:
attr_array_grouped = pd.cut(attr_array, bins=[0, 40, 70, 100], labels=["young", "adult", "senior"])
print(attr_array_grouped.value_counts())

adult      6
older     15
senior    11
dtype: int64


In [114]:
y_test, y_pred, pred_classes = train_attribute_inference(real_single_age, attr_array_grouped, task="classification")

print(classification_report(y_test, y_pred, target_names=pred_classes))

              precision    recall  f1-score   support

       adult       0.00      0.00      0.00         1
       older       0.40      0.67      0.50         3
      senior       0.50      0.33      0.40         3

    accuracy                           0.43         7
   macro avg       0.30      0.33      0.30         7
weighted avg       0.39      0.43      0.39         7



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [115]:
# train the attack model for single beats
y_test, y_pred, pred_classes = train_attribute_inference(real_trio_age, attr_array_grouped, task="classification")

# print performance metrics
print(classification_report(y_test, y_pred, target_names=pred_classes))

              precision    recall  f1-score   support

       adult       0.00      0.00      0.00         1
       older       0.40      0.67      0.50         3
      senior       0.50      0.33      0.40         3

    accuracy                           0.43         7
   macro avg       0.30      0.33      0.30         7
weighted avg       0.39      0.43      0.39         7



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
def save_attack_performance(attack, metrics):    
    with open(osj("..", "dp_models", "attacks", f"{attack}_performance.pkl"), "wb") as f:
        pickle.dump(metrics, f) 

def load_attack_performance(attack):    
    with open(osj("..", "dp_models", "attacks", f"{attack}_performance.pkl"), "rb") as f:
        metrics = pickle.load(f) 
    return metrics

In [None]:
aia_metrics = {"no_dp": {"gender": 0.1, "age": None}}

In [None]:
# runs ...

# def train_real_aia():

attack = "AIA" 
mechanism = "no_dp"
attributes = ["gender", "age"]
# attributes = ["gender", "age", "medication"]

# load patient data
patient_infos = get_patient_infos()
patient_ids = get_ids()

# aia_metrics = {}
aia_metrics = {"no_dp": {}}
# aia_metrics = load_attack_performance(attack)

n_beats = 20

# load the data
real_single, real_trio = get_real_beats()

########  ATTRIBUTE  ########
for attribute in attributes:

    try: 
        aia_metrics[mechanism][attribute]
        logger.info(f"Skipping existing attribute {attribute} ...")

    except KeyError:
    
        # --------- Attack training ---------
        attr_task = "regression" if attribute == "id" else "classification"
        
        # get the patient attributes
        attr_array = get_patient_attribute(attribute, patient_infos)
        
        # TODO stop leakage, by first splitting train and test sets

        # prepare the data
        X_single, y_single = prepare_inference_data(real_single, attr_array, beats_per_patient=n_beats, beats_only=True)
        X_trio, y_trio = prepare_inference_data(real_trio, attr_array, beats_per_patient=n_beats, beats_only=True)

        # train the attack model
        y_test_single, y_pred_single, pred_classes = train_attribute_inference(X_single, y_single, attr_array, task=attr_task)
        y_test_trio, y_pred_trio, pred_classes = train_attribute_inference(X_trio, y_trio, attr_array, task=attr_task)

        # calculate the metrics
        acc_s = accuracy_score(y_test_single, y_pred_single)
        acc_t = accuracy_score(y_test_trio, y_pred_trio)
        pre_s, rec_s, f1_s, _ = precision_recall_fscore_support(y_test_single, y_pred_single, average="macro")
        pre_t, rec_t, f1_t, _ = precision_recall_fscore_support(y_test_trio, y_pred_trio, average="macro")

        metrics = {}
        # save the metrics
        metrics["single"] = {
            "acc": acc_s,
            "rec": rec_s,
            "pre": pre_s,
            "f1": f1_s
        }
        metrics["trio"] = {
            "acc": acc_t,
            "rec": rec_t,
            "pre": pre_t,
            "f1": f1_t
        }          
    
        aia_metrics[mechanism][attribute] = metrics

# save the metrics
#save_attack_performance(attack, aia_metrics)
#logger.info(f"Saved updated attack performance for {attack}.")


  _warn_prf(average, modifier, msg_start, len(result))


In [32]:
aia_metrics

{'no_dp': {'gender': {'single': {'acc': 0.8382352941176471,
    'rec': 0.8342105263157895,
    'pre': 0.8368700265251989,
    'f1': 0.8353510895883778},
   'trio': {'acc': 0.9558823529411765,
    'rec': 0.9552631578947368,
    'pre': 0.9552631578947368,
    'f1': 0.9552631578947368}},
  'age': {'single': {'acc': 0.8676470588235294,
    'rec': 0.87,
    'pre': 0.8646825396825397,
    'f1': 0.8625729368670545},
   'trio': {'acc': 0.8897058823529411,
    'rec': 0.86,
    'pre': 0.9159230769230768,
    'f1': 0.8624046685340803}}}}

## Attack on DP data

In [None]:
# runs ...

# def train_dp_aia():

attack = "AIA" 
p_method = ["laplace", "bounded_n", "gaussian_a"]
attributes = ["gender", "age"]
# attributes = ["gender", "age", "medication"]

# load patient data
patient_infos = get_patient_infos()
patient_ids = get_ids()

# load epsilon values to be tested (top 10 and low 10)
df_toplow_20 = pd.read_pickle("../files/toplow20.pkl") # dataframe with columns: Model, Method, Epsilon, Metric, Value

aia_metrics = {}
# aia_metrics = load_attack_performance(attack)

n_beats = 100

########  MECHANISM  ########
for mechanism in list(set(df_toplow_20["Method"].tolist())): # requires deduplication

    # load the data
    dp_single, dp_trio = get_dp_beats(epsilon)

    epsilon_metrics = {}
    hp_epsilon_values = df_toplow_20[df_toplow_20["Method"] == mechanism]["Epsilon"].tolist()

    ########  EPSILON  ########
    for epsilon in hp_epsilon_values:

        attribute_metrics = {}

        ########  ATTRIBUTE  ########
        for attribute in attributes:

            if aia_metrics[mechanism][epsilon][attribute]:
                logger.info(f"Skipping existing attribute {attribute} ...")
                skipped = True
                continue
            
            else:
                skipped = False
                metrics = {}
                
                # --------- Attack training ---------
                attr_task = "regression" if attribute == "id" else "classification"
                
                # get the patient attributes
                attr_array = get_patient_attribute(attribute, patient_infos)

                # prepare the data
                X_single, y_single = prepare_inference_data(dp_single, attr_array, beats_per_patient=n_beats, beats_only=True)
                X_trio, y_trio = prepare_inference_data(dp_trio, attr_array, beats_per_patient=n_beats, beats_only=True)

                # train the attack model
                y_test_single, y_pred_single, pred_classes_single = train_attribute_inference(X_single, y_single, task=attr_task)
                y_test_trio, y_pred_trio, pred_classes_trio = train_attribute_inference(X_trio, y_trio, task=attr_task)


                # calculate the metrics
                acc_s = accuracy_score(y_test_single, y_pred_single)
                acc_t = accuracy_score(y_test_trio, y_pred_trio)
                pre_s, rec_s, f1_s, _ = precision_recall_fscore_support(y_test_single, y_pred_single, average="binary")
                pre_t, rec_t, f1_t, _ = precision_recall_fscore_support(y_test_trio, y_pred_trio, average="binary")

                mae_s = mean_absolute_error(y_test_single, y_pred_single)
                mae_t = mean_absolute_error(y_test_trio, y_pred_trio)

                mse_s = mean_squared_error(y_test_single, y_pred_single)
                mse_t = mean_squared_error(y_test_trio, y_pred_trio)

                r2_s = r2_score(y_test_single, y_pred_single)
                r2_t = r2_score(y_test_trio, y_pred_trio)

                # save the metrics
                metrics["single"] = {
                    "acc": acc_s,
                    "rec": rec_s,
                    "pre": pre_s,
                    "f1": f1_s,
                    "mae": mae_s,
                    "mse": mse_s,
                    "r2": r2_s
                }
                metrics["trio"] = {
                    "acc": acc_t,
                    "rec": rec_t,
                    "pre": pre_t,
                    "f1": f1_t,
                    "mae": mae_t,
                    "mse": mse_t,
                    "r2": r2_t
                }          

                attribute_metrics[attribute] = metrics

        epsilon_metrics[epsilon] = attribute_metrics
    
    aia_metrics[mechanism] = epsilon_metrics

# save the metrics
save_attack_performance(attack, aia_metrics)
logger.info(f"Saved updated attack performance for {attack}.")


In [None]:
p_methods = ["laplace", "bounded_n", "gaussian_a"]

In [None]:
# Load the 10 best performing and 10 worst performing DP setups.
# they are sorted by Model, Method and value

df_toplow_20 = pd.read_pickle("../files/toplow20.pkl")
df_toplow_20

Unnamed: 0,Model,Method,Epsilon,Metric,Value
7,Ens_val,laplace,5.91,f1,0.949185
19,Ens_val,laplace,0.041,f1,0.942886
17,Ens_val,laplace,1e-05,f1,0.942544
16,Ens_val,laplace,0.021,f1,0.942253
15,Ens_val,laplace,0.091,f1,0.942009
5,Ens_val,gaussian_a,0.71,f1,0.950218
6,Ens_val,gaussian_a,1e-05,f1,0.949441
9,Ens_val,gaussian_a,9.01,f1,0.949064
18,Ens_val,gaussian_a,9.51,f1,0.94261
8,Ens_val,bounded_n,1.21,f1,0.949135


### Laplace

In [101]:
dp_single, dp_trio = get_dp_beats("laplace")

In [None]:
laplace_epsilon = list(set(df_toplow_20[df_toplow_20["Method"] == "laplace"]["Epsilon"].tolist())) # decuplicating the epsilon values
laplace_epsilon

[0.091, 0.11, 5.91, 1e-05, 0.041, 0.021]

#### "gender" attribute inference

In [133]:
# get attribute to be predicted
attr_array = get_patient_attribute("gender", patient_infos)

In [134]:
for epsilon in laplace_epsilon:
    # train the attack model for single beats
    y_test, y_pred, pred_classes = train_attribute_inference(dp_single[epsilon], attr_array, task="classification")

    # print performance metrics
    print(f"Laplace epsilon: {epsilon}")
    print(classification_report(y_test, y_pred, target_names=pred_classes))

Laplace epsilon: 0.091
              precision    recall  f1-score   support

           F       0.75      1.00      0.86         3
           M       1.00      0.75      0.86         4

    accuracy                           0.86         7
   macro avg       0.88      0.88      0.86         7
weighted avg       0.89      0.86      0.86         7

Laplace epsilon: 0.11
              precision    recall  f1-score   support

           F       0.75      1.00      0.86         3
           M       1.00      0.75      0.86         4

    accuracy                           0.86         7
   macro avg       0.88      0.88      0.86         7
weighted avg       0.89      0.86      0.86         7

Laplace epsilon: 5.91
              precision    recall  f1-score   support

           F       0.75      1.00      0.86         3
           M       1.00      0.75      0.86         4

    accuracy                           0.86         7
   macro avg       0.88      0.88      0.86         7
weighte

In [136]:
for epsilon in laplace_epsilon:
    # train the attack model for single beats
    y_test, y_pred, pred_classes = train_attribute_inference(dp_trio[epsilon], attr_array, task="classification")

    # print performance metrics
    print(f"Laplace epsilon: {epsilon}")
    print(classification_report(y_test, y_pred, target_names=pred_classes))

Laplace epsilon: 0.091
              precision    recall  f1-score   support

           F       0.60      1.00      0.75         3
           M       1.00      0.50      0.67         4

    accuracy                           0.71         7
   macro avg       0.80      0.75      0.71         7
weighted avg       0.83      0.71      0.70         7

Laplace epsilon: 0.11
              precision    recall  f1-score   support

           F       0.75      1.00      0.86         3
           M       1.00      0.75      0.86         4

    accuracy                           0.86         7
   macro avg       0.88      0.88      0.86         7
weighted avg       0.89      0.86      0.86         7

Laplace epsilon: 5.91
              precision    recall  f1-score   support

           F       0.75      1.00      0.86         3
           M       1.00      0.75      0.86         4

    accuracy                           0.86         7
   macro avg       0.88      0.88      0.86         7
weighte

#### "age" attribute inference

In [137]:
# get attribute to be predicted
attr_array = get_patient_attribute("age", patient_infos)

dp_single_age = dp_single.copy()
dp_trio_age = dp_trio.copy()

# patient 3 (idx = 2) and 27 (idx = 26, will be 25) have no age and therefore will be removed
del attr_array[2]

for epsilon in laplace_epsilon:

    key_to_remove = list(dp_single_age[epsilon].keys())[2]
    del dp_single_age[epsilon][key_to_remove]
    del dp_trio_age[epsilon][key_to_remove]

del attr_array[25]

for epsilon in laplace_epsilon:

    key_to_remove = list(dp_single_age[epsilon].keys())[25]
    del dp_single_age[epsilon][key_to_remove]
    del dp_trio_age[epsilon][key_to_remove]

In [138]:
attr_array_grouped = pd.cut(attr_array, bins=[0, 40, 70, 100], labels=["young", "adult", "senior"])
print(attr_array_grouped.value_counts())

for epsilon in laplace_epsilon:

    # train the attack model for single beats
    y_test, y_pred, pred_classes = train_attribute_inference(dp_single_age[epsilon], attr_array_grouped, task="classification")

    # print performance metrics
    print(f"Laplace epsilon: {epsilon}")
    print(classification_report(y_test, y_pred, target_names=pred_classes))

young      6
adult     15
senior    11
dtype: int64
Laplace epsilon: 0.091
              precision    recall  f1-score   support

       adult       0.25      0.33      0.29         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.17         6
   macro avg       0.08      0.11      0.10         6
weighted avg       0.12      0.17      0.14         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Laplace epsilon: 0.11
              precision    recall  f1-score   support

       adult       0.40      0.67      0.50         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.33         6
   macro avg       0.13      0.22      0.17         6
weighted avg       0.20      0.33      0.25         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Laplace epsilon: 5.91
              precision    recall  f1-score   support

       adult       0.25      0.33      0.29         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.17         6
   macro avg       0.08      0.11      0.10         6
weighted avg       0.12      0.17      0.14         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Laplace epsilon: 1e-05
              precision    recall  f1-score   support

       adult       0.50      0.67      0.57         3
      senior       0.50      0.50      0.50         2
       young       0.00      0.00      0.00         1

    accuracy                           0.50         6
   macro avg       0.33      0.39      0.36         6
weighted avg       0.42      0.50      0.45         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Laplace epsilon: 0.041
              precision    recall  f1-score   support

       adult       0.33      0.33      0.33         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.17         6
   macro avg       0.11      0.11      0.11         6
weighted avg       0.17      0.17      0.17         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Laplace epsilon: 0.021
              precision    recall  f1-score   support

       adult       0.25      0.33      0.29         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.17         6
   macro avg       0.08      0.11      0.10         6
weighted avg       0.12      0.17      0.14         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [139]:
for epsilon in laplace_epsilon:

    # train the attack model for single beats
    y_test, y_pred, pred_classes = train_attribute_inference(dp_trio_age[epsilon], attr_array_grouped, task="classification")

    # print performance metrics
    print(f"Laplace epsilon: {epsilon}")
    print(classification_report(y_test, y_pred, target_names=pred_classes))

Laplace epsilon: 0.091
              precision    recall  f1-score   support

       adult       0.40      0.67      0.50         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.33         6
   macro avg       0.13      0.22      0.17         6
weighted avg       0.20      0.33      0.25         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Laplace epsilon: 0.11
              precision    recall  f1-score   support

       adult       0.40      0.67      0.50         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.33         6
   macro avg       0.13      0.22      0.17         6
weighted avg       0.20      0.33      0.25         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Laplace epsilon: 5.91
              precision    recall  f1-score   support

       adult       0.50      1.00      0.67         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.50         6
   macro avg       0.17      0.33      0.22         6
weighted avg       0.25      0.50      0.33         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Laplace epsilon: 1e-05
              precision    recall  f1-score   support

       adult       0.50      1.00      0.67         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.50         6
   macro avg       0.17      0.33      0.22         6
weighted avg       0.25      0.50      0.33         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Laplace epsilon: 0.041
              precision    recall  f1-score   support

       adult       0.25      0.33      0.29         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.17         6
   macro avg       0.08      0.11      0.10         6
weighted avg       0.12      0.17      0.14         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Laplace epsilon: 0.021
              precision    recall  f1-score   support

       adult       0.25      0.33      0.29         3
      senior       0.00      0.00      0.00         2
       young       0.00      0.00      0.00         1

    accuracy                           0.17         6
   macro avg       0.08      0.11      0.10         6
weighted avg       0.12      0.17      0.14         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### LaplaceBoundedNoise

In [None]:
dp_single, dp_trio = get_dp_beats("bounded_n")

In [None]:
bounded_epsilon = list(set(df_toplow_20[df_toplow_20["Method"] == "bounded_n"]["Epsilon"].tolist()))
bounded_epsilon

### GaussianAnalytic

In [None]:
dp_single, dp_trio = get_dp_beats("gaussian_a")

In [None]:
gaussian_epsilon = list(set(df_toplow_20[df_toplow_20["Method"] == "gaussian_a"]["Epsilon"].tolist()))
gaussian_epsilon