In [1]:
import numpy as np
from scipy.special import erf
import os
import sys
import time
import copy
import logging
import matplotlib.pyplot as plt

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
src_dir = os.path.join(project_dir, 'src')
log_dir = os.path.join(project_dir, 'log')
fig_dir = os.path.join(project_dir, 'fig')

# Add the src directory to sys.path
sys.path.append(src_dir)


from Dataset.cancer_dataset_for_LS import main_generate_cancer_matrices_for_LS, load_cancer_dataset_matrices_for_LS
from analysis.commons import data_normalize_by_features, concatenate_B_b

from RP_mechanisms.optim_RP_DP import compute_largest_l2
from LS_mechanisms.alt19_LS import ALT19LS_mech
from LS_mechanisms.optim_LS_DP import LS_fromoptim_RP_mech, OptimalLS_mech
from LS_mechanisms.LS_accuracy_estimator import LS_accuracy_estimator

In [2]:
logfile_path = os.path.join(log_dir, 'LS-cancer-default.log')
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(message)s",
    handlers=[
        logging.FileHandler(logfile_path),
        logging.StreamHandler()
    ]
)

def store_array_str(data_array):
    converted_str = np.array_str(data_array)
    converted_str = converted_str.replace(' ',',')
    return converted_str

file_X_path = os.path.join(project_dir, "Dataset", "cancer-LR-X.txt")
file_y_path = os.path.join(project_dir, "Dataset", "cancer-LR-y.txt")

X = None
y = None
try:
    X, y = load_cancer_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
except:
    main_generate_cancer_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
    X, y = load_cancer_dataset_matrices_for_LS()

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

B, b = data_normalize_by_features(X, y)
A = concatenate_B_b(B, b)

In [4]:
def generate_default_configuration():
    kwargs = {
        'database': A,
        'r' : 300,
        'CI' : 0.95,
        'bootstrap_samples': 100, 
        'l2' : compute_largest_l2(A)
    }
    
    return kwargs

kwargs = generate_default_configuration()

In [5]:
from LS_mechanisms.optim_LS_DP import ImprovedOptimalLS_mech

In [8]:
v1_mech = OptimalLS_mech(kwargs)

epsilon = 2
delta = 10**(-6)
num_samples = 100

v1_mech.find_minimal_sigma(epsilon, delta)

7.02201756971702

In [9]:
tic = time.perf_counter()
samples = v1_mech._gen_samples(epsilon, delta, num_samples)
toc = time.perf_counter()
logging.critical(f"cost {toc - tic:0.4f} seconds")

tic = time.perf_counter()
accuracy_estimator = LS_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_square_error(samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-11-12 23:14:20,111 cost 2.4773 seconds
2025-11-12 23:14:20,135 (bootstrap_result(lb_one_sided=np.float64(0.9896768382214716), lb_two_sided=np.float64(0.9874757325758916), ub_one_sided=np.float64(1.0139516566638025), ub_two_sided=np.float64(1.0150853235392021)), bootstrap_result(lb_one_sided=np.float64(4.551321199992122), lb_two_sided=np.float64(4.526084919075353), ub_one_sided=np.float64(4.898555146494531), ub_two_sided=np.float64(4.931253416740696)))
2025-11-12 23:14:20,136 cost 0.0232 seconds


In [17]:
v2_mech = ImprovedOptimalLS_mech(kwargs)

epsilon = 5
delta = 10**(-6)
rho = 1
v2_mech.find_sigma_with_fixed_rho(epsilon, delta, rho=rho)

12.833513275146485

In [16]:
tic = time.perf_counter()
samples = v2_mech._gen_samples(epsilon, delta, num_samples, rho=rho)
toc = time.perf_counter()
logging.critical(f"cost {toc - tic:0.4f} seconds")

tic = time.perf_counter()
accuracy_estimator = LS_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_square_error(samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-11-12 23:18:27,922 cost 17.3924 seconds
2025-11-12 23:18:27,946 (bootstrap_result(lb_one_sided=np.float64(11.084835868095517), lb_two_sided=np.float64(11.02139851300105), ub_one_sided=np.float64(11.910393219392464), ub_two_sided=np.float64(11.923586986776403)), bootstrap_result(lb_one_sided=np.float64(0.08833432773786226), lb_two_sided=np.float64(0.08797459985097004), ub_one_sided=np.float64(0.09474717358766511), ub_two_sided=np.float64(0.0951704173123909)))
2025-11-12 23:18:27,947 cost 0.0232 seconds


In [9]:
# mech = LS_fromoptim_RP_mech(kwargs)

# epsilon = 1
# delta = 1/mech.n

# mech.find_minimal_sigma(epsilon, delta)

In [10]:
# mech = LS_fromoptim_RP_mech(kwargs)

# epsilon = 1
# delta = 1/mech.n
# num_samples = 100

# tic = time.perf_counter()
# samples = mech._gen_samples(epsilon, delta, num_samples)
# toc = time.perf_counter()
# logging.critical(f"cost {toc - tic:0.4f} seconds")

In [11]:
# tic = time.perf_counter()
# accuracy_estimator = LS_accuracy_estimator(kwargs)
# boot_res = accuracy_estimator.estimate_square_error(samples)
# toc = time.perf_counter()
# logging.critical(boot_res)
# logging.critical(f"cost {toc - tic:0.4f} seconds")

In [12]:
# mech = ALT19LS_mech(kwargs)

# epsilon = 1
# delta = 1/mech.n

# mech.compute_constant(epsilon, delta)

In [13]:
# mech = ALT19LS_mech(kwargs)

# epsilon = 1
# delta = 1/mech.n
# num_samples = 100

# tic = time.perf_counter()
# samples = mech._gen_samples(epsilon, delta, num_samples)
# toc = time.perf_counter()
# logging.critical(f"cost {toc - tic:0.4f} seconds")

In [14]:
# tic = time.perf_counter()
# accuracy_estimator = LS_accuracy_estimator(kwargs)
# boot_res = accuracy_estimator.estimate_square_error(samples)
# toc = time.perf_counter()
# logging.critical(boot_res)
# logging.critical(f"cost {toc - tic:0.4f} seconds")