# Differential Privacy

## Global Imports

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as spio
import tensorflow as tf
import tensorflow_addons as tfa
from os.path import join as osj
import pandas as pd

from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Embedding, Dense, Bidirectional, Input
from tensorflow.keras import Model

import random
import pickle
import time
import os
import argparse
import copy

from datetime import datetime
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

# Differential privacy libraries
from diffprivlib import mechanisms
from diffprivlib import models
from diffprivlib import tools
from diffprivlib.accountant import BudgetAccountant
from diffprivlib.utils import check_random_state
from diffprivlib.mechanisms import Laplace, LaplaceBoundedNoise, GaussianAnalytic
from diffprivlib.mechanisms import DPMechanism

from collections import Counter

import logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=logging.INFO,
    datefmt="%Y-%m-%d %H:%M:%S"
)
logger = logging.getLogger()

random.seed(654)


2025-03-30 08:27:11.697152: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743323231.713229 2191441 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743323231.718265 2191441 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-30 08:27:11.735958: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and re

## Load data 

To check if data is similar to Zero-Shot Paper, so that the pre-processed data can be reused.

In [None]:
beats = [] 
filename = 
dict_samples = spio.loadmat('../data/s2s_mitbih_aami.mat')

In [4]:
samples = dict_samples['s2s_mitbih'] # 2D array with 2 columns: ecg values and labels
values = samples[0]['seg_values'] # ecg values
labels = samples[0]['seg_labels'] # labels

In [5]:
dict_samples

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN64, Created on: Mon Oct 15 12:00:19 2018',
 '__version__': '1.0',
 '__globals__': [],
 's2s_mitbih': array([[(array([[array([[ 8.34882169e-01],
                        [ 8.34882169e-01],
                        [ 8.34882169e-01],
                        [ 8.34882169e-01],
                        [ 8.34882169e-01],
                        [ 8.30291005e-01],
                        [ 9.36456142e-01],
                        [ 8.83556506e-01],
                        [ 8.23896168e-01],
                        [ 7.72322824e-01],
                        [ 7.78618972e-01],
                        [ 7.33875757e-01],
                        [ 6.61729669e-01],
                        [ 6.32067800e-01],
                        [ 7.23884667e-01],
                        [ 7.42266351e-01],
                        [ 6.56042541e-01],
                        [ 6.06521417e-01],
                        [ 6.89186565e-01],
                        [ 8.5

In [6]:
print(len(values)) # == 48 patients
print(len(values[1])) # == XXXX -> Nr of segments (differs per patient)
print(len(values[1][1861])) # 1 unneccesary array level before the segment values
print(len(values[1][1861][0])) # 280 beats in segment
print(len(values[1][1861][0][279])) # 1 value of the 280 values in segment 
print(values[1][1861][0][279])

# all levels are type numpy.ndarray

48
1862
1
280
1
[0.7656677]


In [7]:
print(len(labels)) # == 48 patients (type numpy.ndarray)
print(len(labels[1])) # == 1 since 1D Array with all lables (type numpy.ndarray)
print(len(labels[1][0])) # == 2271 one array for each segment of the patient (type numpy.str_)
print(len(labels[1][0][1861]))# == 1, the label for the segment (type str)
print(labels[1][0][1861])


48
1
1862
1
N


In [10]:
with open(osj("..", "..", "Zero-Shot-ECG", "dp_signals", "laplace.pkl"), "rb") as f:
    dp_signals = pickle.load(f)

In [11]:
print(len(dp_signals))
print(len(dp_signals[1e-05]))
print(dp_signals[1e-05][101][0])

112
48
-0.3449717469301798


In [12]:
values[1][1][0]

array([[ 6.46768771e-01],
       [ 6.22484148e-01],
       [ 6.25298548e-01],
       [ 6.19981657e-01],
       [ 6.11128754e-01],
       [ 6.31265178e-01],
       [ 6.12957592e-01],
       [ 6.40964458e-01],
       [ 5.93259455e-01],
       [ 6.11733724e-01],
       [ 6.03326059e-01],
       [ 5.70734037e-01],
       [ 4.94973157e-01],
       [ 4.40158779e-01],
       [ 3.61920425e-01],
       [ 3.42848979e-01],
       [ 3.76183424e-01],
       [ 3.83826328e-01],
       [ 2.73815808e-01],
       [ 1.93911977e-01],
       [ 1.70798110e-01],
       [ 1.87278917e-01],
       [ 1.89918178e-01],
       [ 1.38257898e-01],
       [ 4.08544868e-02],
       [ 2.22755533e-02],
       [ 2.83600008e-02],
       [ 3.66612091e-02],
       [ 1.51395866e-02],
       [-6.52903376e-02],
       [-1.01015782e-01],
       [-9.37218286e-02],
       [-5.81162141e-02],
       [-3.60792302e-02],
       [-8.64159834e-02],
       [-1.33609465e-01],
       [-7.92236064e-02],
       [-5.18261371e-02],
       [-6.7

In [13]:
# Get values per patient from this paper
values_counts = {}

for patient in range(len(values)):  # Iteriere über Patienten
    total_values = 0
    for segment in values[patient]:  # Iteriere über Segmente des Patienten
        total_values += len(segment[0])  # Zähle die Werte in jedem Segment
    patient_nr = list(dp_signals[0.01].keys())[patient]
    values_counts[str(patient_nr)] = total_values

# Get values per patient from Zero-Shot-ECG paper
dp_value_counts = {str(patient): len(dp_signals[0.01][patient]) for patient in dp_signals[0.01]}

In [14]:
df_value_counts = pd.DataFrame({
    "Patient": list(values_counts.keys()),
    "Values_Count": list(values_counts.values()),
    "DP_Values_Count": [dp_value_counts.get(patient, 0) for patient in values_counts.keys()]  # Falls Patient fehlt, setze 0
})

In [15]:
df_value_counts

Unnamed: 0,Patient,Values_Count,DP_Values_Count
0,100,635880,650000
1,101,521360,650000
2,102,612080,650000
3,103,583240,650000
4,104,622440,650000
5,105,717080,650000
6,106,567280,650000
7,107,597520,650000
8,108,492240,650000
9,109,708680,650000


The values per patient differ between the papers, which makes it impossible to map the values and re-use the differential privacy values from the other paper.
Hence the values will be adjusted again for this paper, with the same setup.

In [33]:
def save_patient_ids(dict_patients):
    with open(osj("..", "data", "all_patients.pkl"), "wb") as f:
        pickle.dump(dict_patients, f)

dict_patients = df_value_counts["Patient"].unique().tolist()
save_patient_ids(dict_patients)

## Prepare differential privacy setup

In [None]:
p_method = ["laplace", "bounded_n", "gaussian_a"]
hp_epsilon_values = [0.00001, 0.0001, 0.001, 0.01, 0.021, 0.031, 0.041, 0.051, 0.061, 0.071, 0.081, 0.091,
                          0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 
                    1.01, 1.11, 1.21, 1.31, 1.41, 1.51, 1.61, 1.71, 1.81, 1.91, 
                    2.01]
hp_delta_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]

In [34]:
def get_patient_ids():
    with open(osj("..", "data", "all_patients.pkl"), "rb") as f:
        return pickle.load(dict_patients, f)
    
def get_data():
    dict_samples = spio.loadmat('../data/s2s_mitbih_aami_original.mat')
    samples = dict_samples['s2s_mitbih'] # 2D array with 2 columns: ecg values and labels
    values = samples[0]['seg_values'] # ecg values
    return values

def read_dp_signals(m):
    with open(osj("..", "dp_data", m + ".pkl"), "rb") as f:
        return pickle.load(f)
    
def get_global_sensitivity(dict_patients, values, labels):

    # Step 1: Get counts per patient
    all_count = {patient: {"F": 0, "N": 0, "S": 0, "V": 0, "Q": 0, "min": 0.0, "max": 0.0, "mean": 0.0} for patient in dict_patients}
    counter = 0

    for patient in dict_patients:

        label_counts = Counter()
        patient_values = []

        # count labels per patient
        label_string = labels[counter][0]
        label_counts = dict(Counter(label_string))
        for key in ["F", "N", "S", "V", "Q"]:
            try:
                all_count[patient][key] = label_counts[key] 
            except KeyError:
                all_count[patient][key] = 0

        # count values per patient
        for segment in values[counter]: 
            for segment_values in segment[0]: 
                patient_values.extend(segment_values) 

        # get min, max and mean values per patient
        if patient_values:
            all_count[patient]["min"] = np.min(patient_values)
            all_count[patient]["max"] = np.max(patient_values)
            all_count[patient]["mean"] = np.mean(patient_values)
        counter += 1

    # Step 2: aggregate values for all patients
    all_patients_aggregated = {"F": 0, "N": 0, "S": 0, "V": 0, "Q": 0, "g_min": 0.0, "g_max": 0.0, "g_mean": 0.0}

    for patient in all_count:
        for key in ["F", "N", "S", "V", "Q"]:
            all_patients_aggregated[key] += all_count[patient][key]

        all_patients_aggregated["g_min"]   = min(all_count[patient]["min"] for patient in all_count)
        all_patients_aggregated["g_max"]   = max(all_count[patient]["max"] for patient in all_count)
        sum_mean                 = sum(all_count[patient]["mean"] for patient in all_count)
        all_patients_aggregated["g_mean"]  = sum_mean / len(all_count)    

    # all_patients_aggregated = {'F': 802,
    #                             'N': 90502,
    #                             'S': 2777,
    #                             'V': 7226,
    #                             'Q': 8031,
    #                             'g_min': np.float64(-13.04264919165635),
    #                             'g_max': np.float64(13.320118149995809),
    #                             'g_mean': np.float64(0.01151252087416888)}

    # Step 3: aggregate values while iteratively leaving one patient out
    all_count_agg = {patient: {"F": 0, "N": 0, "S": 0, "V": 0, "Q": 0, "g_min": 0.0, "g_max": 0.0, "g_mean": 0.0} for patient in dict_patients}
    counter = 0

    for patient_leavout in dict_patients:

        # leave current patient out
        all_count_copy = copy.deepcopy(all_count)
        del all_count_copy[patient_leavout] 

        # sum counts for all other patients
        for patient in all_count_copy:
            for key in ["F", "N", "S", "V", "Q"]:
                all_count_agg[patient_leavout][key] += all_count_copy[patient][key]

        # all_count_agg[patient_leavout]["g_min"]   = min(all_count_copy[patient]["min"] for patient in all_count_copy)
        # all_count_agg[patient_leavout]["g_max"]   = max(all_count_copy[patient]["max"] for patient in all_count_copy)
        # sum_mean                                  = sum(all_count_copy[patient]["mean"] for patient in all_count_copy)
        # all_count_agg[patient_leavout]["g_mean"]  = sum_mean / len(all_count_copy)

    # Step 4: calculate the ratios
    diff_f = all_patients_aggregated["F"] - min(all_count_agg[patient]["F"] for patient in all_count_agg)
    ratio_f = diff_f / all_patients_aggregated["F"]

    diff_n = all_patients_aggregated["N"] - min(all_count_agg[patient]["N"] for patient in all_count_agg)
    ratio_n = diff_n / all_patients_aggregated["N"]

    diff_s = all_patients_aggregated["S"] - min(all_count_agg[patient]["S"] for patient in all_count_agg)
    ratio_s = diff_s / all_patients_aggregated["S"]

    diff_v = all_patients_aggregated["V"] - min(all_count_agg[patient]["V"] for patient in all_count_agg)
    ratio_v = diff_v / all_patients_aggregated["V"]

    diff_q = all_patients_aggregated["Q"] - min(all_count_agg[patient]["Q"] for patient in all_count_agg)
    ratio_q = diff_q / all_patients_aggregated["Q"]

    max_ratio = max(ratio_f, ratio_n, ratio_s, ratio_v, ratio_q)

    # diff_min  = all_patients_aggregated["g_min"] + abs(max(all_count_agg[patient]["g_min"] for patient in all_count_agg))
    # diff_max  = all_patients_aggregated["g_max"] - min(all_count_agg[patient]["g_max"] for patient in all_count_agg)
    # diff_mean_min = all_patients_aggregated["g_mean"] - min(all_count_agg[patient]["g_mean"] for patient in all_count_agg)
    # diff_mean_max = max(all_count_agg[patient]["g_mean"] for patient in all_count_agg) - all_patients_aggregated["g_mean"]

    return max_ratio

def set_dp_mechanism(m, e, d, s): 
    seed = random.seed(42)
    if m == 'laplace':
        dp_mechanism = Laplace(epsilon=e, delta=d, sensitivity=s, random_state=seed)
    elif m == 'bounded_n':
        dp_mechanism = LaplaceBoundedNoise(epsilon=e, delta=d, sensitivity=s, random_state=seed) # Delta must be > 0 and in (0, 0.5).
    elif m == "gaussian_a":
        dp_mechanism = GaussianAnalytic(epsilon=e, delta=d, sensitivity=s, random_state=seed)

    return dp_mechanism


def run_diffpriv(method, epsilon, delta, sensitivity, values):
    ecgs = copy.deepcopy(values)
    i = 0
    random.seed(42) 
    mechanism = set_dp_mechanism(method, epsilon, delta, sensitivity)

    ########  PATIENT  ########
    for patient in values: 
        logger.info(f"Starting with patient {patient} ...")
        i += 1

        ########  SEGMENT  ########
        for segment in patient:
            signal_count = 0 

            ########  SIGNAL  ########
            for signal in ecgs[patient][segment][0]:
                dp_signal = mechanism.randomise(signal)
                ecgs[patient][segment][0][signal_count] = dp_signal
                signal_count += 1

    return ecgs

def save_dp_signals(dict_signals_dp, m):
    with open(osj("..", "dp_signals", m + ".pkl"), "wb") as f:
        pickle.dump(dict_signals_dp, f)



In [None]:
# print(ratio_f)
# print(ratio_n)
# print(ratio_s)
# print(ratio_v)
# print(ratio_q)

# print(diff_min)
# print(diff_max)
# print(diff_mean_max)
# print(diff_mean_min)

# printed:
# 0.46384039900249374
# 0.035269938785883186
# 0.49729924378826074
# 0.13728203708829229
# 0.2593699414767775
# 0.2786123120283415
# -0.5793735458718228
# 1.765213766018011
# 0.0009201804631791544
# 0.0017975683917464888

0.46384039900249374
0.035269938785883186
0.49729924378826074
0.13728203708829229
0.2593699414767775
0.2786123120283415
-0.5793735458718228
1.765213766018011
0.0009201804631791544
0.0017975683917464888


Die Schwankungen in den Werten selbst sind nicht ausschlaggebend für die Anwendung der Klassifizierungen, daher werden die relativen Veränderungen an den Klassen durch die Herausnahme einzelner Patienten betrachtet. Die Sensitivität unterscheidet sich recht stark zwischen den Klasse, da sie unterschiedlich start vertreten sind und es daher ein Patient mehr oder weniger starken Einfluss hat.

Gewählt wird die maximale Sensitivität der Klassen: F,N,S,V (da Q im modelltraining nicht weiter in Betracht gezogen wird)
Somit ist die Sensitivität 0,497.

## Apply Differential Privacy

In [31]:
value_test = values[0][0][0][0]
value_test.item()


0.8348821692979582

In [None]:
random.seed(42)

ecgs = copy.deepcopy(values)
mechanism = set_dp_mechanism("laplace", 0.01, 0.1, 0.49)

patient_count = 0
########  PATIENT  ########
for patient in values:
    segment_count = 0
    logger.info(f"Starting with patient {patient_count} ...")

    ########  SEGMENT  ########
    for segment in patient:
        signal_count = 0 

        ########  SIGNAL  ########
        for signal in ecgs[patient_count][segment_count][0]:
            dp_signal = mechanism.randomise(signal.item())
            ecgs[patient_count][segment_count][0][signal_count] = dp_signal
            signal_count += 1

        segment_count += 1
        
    patient_count += 1

2025-03-30 08:35:10 - INFO - Starting with patient 0 ...
2025-03-30 08:35:17 - INFO - Starting with patient 1 ...
2025-03-30 08:35:22 - INFO - Starting with patient 2 ...
2025-03-30 08:35:28 - INFO - Starting with patient 3 ...
2025-03-30 08:35:34 - INFO - Starting with patient 4 ...
2025-03-30 08:35:41 - INFO - Starting with patient 5 ...
2025-03-30 08:35:48 - INFO - Starting with patient 6 ...
2025-03-30 08:35:54 - INFO - Starting with patient 7 ...
2025-03-30 08:36:00 - INFO - Starting with patient 8 ...
2025-03-30 08:36:05 - INFO - Starting with patient 9 ...
2025-03-30 08:36:13 - INFO - Starting with patient 10 ...
2025-03-30 08:36:19 - INFO - Starting with patient 11 ...
2025-03-30 08:36:27 - INFO - Starting with patient 12 ...
2025-03-30 08:36:32 - INFO - Starting with patient 13 ...
2025-03-30 08:36:37 - INFO - Starting with patient 14 ...
2025-03-30 08:36:43 - INFO - Starting with patient 15 ...
2025-03-30 08:36:50 - INFO - Starting with patient 16 ...
2025-03-30 08:36:55 - IN

KeyboardInterrupt: 

In [38]:
mechanism = "laplace"
hp_epsilon_values = [0.001, 0.01, 0.021, 0.031, 0.041, 0.051, 0.061, 0.071, 0.081, 0.091,
                            0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 
                    1.01, 1.11, 1.21, 1.31, 1.41, 1.51, 1.61, 1.71, 1.81, 1.91, 
                    2.01]
hp_delta_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
sensitivity = 0.49
values = get_data()

logger.info(f"Setup for differential privacy with {mechanism} for all patients per epsilon.")
dict_signals_dp = dict.fromkeys(hp_epsilon_values)

if os.path.exists(osj("..", "data_dp", mechanism + ".pkl")):
    dict_signals_dp = read_dp_signals(mechanism)

########  EPSILON  ########
for epsilon in hp_epsilon_values:
    
    if (dict_signals_dp[epsilon] is not None) and (len(dict_signals_dp[epsilon]) == len(hp_delta_values)):
        print("Reached this")

2025-03-30 09:16:51 - INFO - Setup for differential privacy with laplace for all patients per epsilon.


In [37]:
dict_signals_dp

{0.001: None,
 0.01: None,
 0.021: None,
 0.031: None,
 0.041: None,
 0.051: None,
 0.061: None,
 0.071: None,
 0.081: None,
 0.091: None,
 0.11: None,
 0.21: None,
 0.31: None,
 0.41: None,
 0.51: None,
 0.61: None,
 0.71: None,
 0.81: None,
 0.91: None,
 1.01: None,
 1.11: None,
 1.21: None,
 1.31: None,
 1.41: None,
 1.51: None,
 1.61: None,
 1.71: None,
 1.81: None,
 1.91: None,
 2.01: None}

In [None]:
# def apply_diffpriv():

sensitivity = get_global_sensitivity(dict_patients, values, labels)

########  MECHANISM  ########
for mechanism in p_method:
    logger.info(f"Setup for differential privacy with {mechanism} for all patients per epsilon.")
    dict_signals_dp = dict.fromkeys(hp_epsilon_values)

    if os.path.exists(osj("..", "dp_data", mechanism + ".pkl")):
        dict_signals_dp = read_dp_signals(mechanism)

    ########  EPSILON  ########
    for epsilon in hp_epsilon_values:
        
        if dict_signals_dp[epsilon] is not None:
            logger.info(f"Skipping epsilon {epsilon} ...")
            continue   
        else:

            ########  DELTA  ########
            for delta in hp_delta_values:

                if dict_signals_dp[epsilon][delta] is not None:
                    logger.info(f"Skipping delta {delta} ...")
                    continue
                else:
                    
                    logger.info(f"Calculating data for epsilon {epsilon} and delta {delta} ...")
                    dp_all_patients = run_diffpriv(mechanism, epsilon, delta, sensitivity, values)      
                    dict_signals_dp[epsilon][delta] = dp_all_patients
                    
                    # save dp signals    
                    save_dp_signals(dict_signals_dp, mechanism)
                    logger.info(f"Saved results for epsilon {epsilon} and delta {delta}")
