In [2]:
import os
import sys
import numpy as np

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
src_dir = os.path.join(project_dir, 'src')
log_dir = os.path.join(project_dir, 'log')
fig_dir = os.path.join(project_dir, 'fig')

# Add the src directory to sys.path
sys.path.append(src_dir)


from Dataset.cancer_dataset_for_LS import main_generate_cancer_matrices_for_LS, load_cancer_dataset_matrices_for_LS
from analysis.commons import data_normalize_by_features, concatenate_B_b

from analysis.RP_privacy_analysis_advanced import rp_find_rho_for_delta, smooth_leverage_upper_bound, f_smooth
from RP_mechanisms.optim_RP_DP import compute_largest_l2


In [12]:
from Dataset.flight_dataset_for_LS import load_flight_dataset_matrices_for_LS, save_flight_dataset_matrices_for_LS

file_X_path = os.path.join(project_dir, "Dataset", "flight-LR-X.txt")
file_y_path = os.path.join(project_dir, "Dataset", "flight-LR-y.txt")

X = None
y = None
try:
    X, y = load_flight_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
except:
    save_flight_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
    X, y = load_flight_dataset_matrices_for_LS()

# file_X_path = os.path.join(project_dir, "Dataset", "cancer-LR-X.txt")
# file_y_path = os.path.join(project_dir, "Dataset", "cancer-LR-y.txt")

# X = None
# y = None
# try:
#     X, y = load_cancer_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
# except:
#     main_generate_cancer_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
#     X, y = load_cancer_dataset_matrices_for_LS()

B, b = data_normalize_by_features(X, y)
A = concatenate_B_b(B, b)

In [15]:
from RP_mechanisms.optim_RP_DP import OptimalRP_mech, compute_largest_l2
epsilon = 1
delta = 1e-3
r = 300
largest_row_norm = compute_largest_l2(A)

def generate_default_configuration():
    kwargs = {
        'database': A,
        'r' : r,
        'CI' : 0.95,
        'bootstrap_samples': 100,
        'l2' : largest_row_norm
    }
    
    return kwargs

kwargs = generate_default_configuration()

mech = OptimalRP_mech(kwargs)

sigma = mech.find_minimal_sigma(epsilon, delta)
print(sigma)

0.42852637504569646


In [17]:
from RP_mechanisms.optim_RP_DP import one_short_PTR_RP_mech

mech = one_short_PTR_RP_mech(kwargs)

sigma = mech.find_minimal_sigma(epsilon, delta, ratio=0.05)
print(sigma)

sigma: 1.8376897975e+00
lam_min: 7.9253189013e-02
lam_lb: 4.8450623427e-02
lam_tilde: 8.3799738110e-02
eta: 4.5465490965e-03
ratio: 5.0000000000e-02
s_bar: 1.5645437785e-03
alpha: 3.5349114683e-02
upper bound of sigma: 1.8508253335e+00
l: 7.3208126926e-02
lam_min: 7.9253189013e-02
1.837689797465732


In [8]:
ATA = A.T @ A
eigenvalues = np.linalg.eigvalsh(ATA)  # Use eigvalsh for symmetric matrices
lam_min  = np.min(eigenvalues)

print(f"Smallest eigenvalue: {lam_min :.10e}")

Smallest eigenvalue: 7.9253189013e-02


In [6]:
row_norms = np.linalg.norm(A, axis=1)
l = np.max(row_norms)

In [18]:

rng = np.random.default_rng()
eps_T = 1
delta_fail = 1e-5

alpha = (l**2 / eps_T) * np.log(1.0 / (2.0 * delta_fail))
eta = float(rng.laplace(loc=0.0, scale=(l**2)/eps_T))
print(f"eta: {eta:.10e}")
lam_tilde = lam_min + eta


lam_lb = max(lam_tilde - alpha, 0.0)
print(f"lambda lower bound: {lam_lb:.10e}")
# sigma2 = max((l**2) / s_bar - lam_lb, 0.0)

eta: -1.0850914657e-02
lambda lower bound: 1.0414431670e-02


In [5]:
from RP_mechanisms.optim_RP import compute_IS

epsilon = 1
r = 30
leverage = 0.05
compute_IS(epsilon, leverage, r)

np.float64(8.85509089962406e-06)

In [6]:
from analysis.RP_privacy_analysis_advanced import compute_IS

compute_IS(epsilon, leverage, r)

8.85509089968015e-06