In [1]:
import numpy as np
from scipy.special import erf
import os
import sys
import time
import copy
import logging
import matplotlib.pyplot as plt

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
src_dir = os.path.join(project_dir, 'src')
log_dir = os.path.join(project_dir, 'log')
fig_dir = os.path.join(project_dir, 'fig')

# Add the src directory to sys.path
sys.path.append(src_dir)



from Dataset.flight_dataset_for_LS import load_flight_dataset_matrices_for_LS, save_flight_dataset_matrices_for_LS
from analysis.commons import data_normalize_by_features, concatenate_B_b, data_normalize_by_sample


from RP_mechanisms.optim_RP_DP import compute_largest_l2
from LS_mechanisms.alt19_LS import ALT19LS_mech
from LS_mechanisms.optim_LS_DP import LS_fromoptim_RP_mech, OptimalLS_mech
from LS_mechanisms.LS_accuracy_estimator import LS_accuracy_estimator
from LS_mechanisms.optim_LS_DP import variantLS_mech

In [2]:
logfile_path = os.path.join(log_dir, 'LS-flight-default.log')
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(message)s",
    handlers=[
        logging.FileHandler(logfile_path),
        logging.StreamHandler()
    ]
)

def store_array_str(data_array):
    converted_str = np.array_str(data_array)
    converted_str = converted_str.replace(' ',',')
    return converted_str

file_X_path = os.path.join(project_dir, "Dataset", "flight-LR-X.txt")
file_y_path = os.path.join(project_dir, "Dataset", "flight-LR-y.txt")

X = None
y = None
try:
    X, y = load_flight_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
except:
    save_flight_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
    X, y = load_flight_dataset_matrices_for_LS()
    

B,b = data_normalize_by_sample(X, y)
A = concatenate_B_b(B, b)

In [3]:
def generate_default_configuration():
    kwargs = {
        'database': A,
        'r' : 1200,
        'CI' : 0.95,
        'bootstrap_samples': 100, 
        'l2' : compute_largest_l2(A),
        'clip_norm' : 1,
        'lev_upper_bound' : 1
    }
    
    return kwargs

kwargs = generate_default_configuration()

In [4]:
mech = variantLS_mech(kwargs)

epsilon = 1
delta = 1/mech.n
mech.find_minimal_sigma(epsilon, delta)

 leverage upper bound is 1


current sigma is 50.0005, and the corresponding delta is 0.0; leverage is 0.0007993445373993981, augmented leverage is 0.0015986890747987962
ratio is 1.0
current sigma is 25.00075, and the corresponding delta is 1.794488283915562e-21; leverage is 0.003189601894951816, augmented leverage is 0.006379203789903632
ratio is 1.0
current sigma is 12.500875, and the corresponding delta is 0.002981699556364031; leverage is 0.012636483837227507, augmented leverage is 0.025272967674455013
ratio is 1.0
current sigma is 18.750812500000002, and the corresponding delta is 1.1475029840367006e-08; leverage is 0.005656221059305242, augmented leverage is 0.011312442118610483
ratio is 1.0
current sigma is 15.625843750000001, and the corresponding delta is 2.4539339888786756e-05; leverage is 0.008124566077862506, augmented leverage is 0.016249132155725012
ratio is 1.0
current sigma is 17.188328125, and the corresponding delta is 8.202634391363956e-07; leverage is 0.00672407629816622, augmented leverage is 

16.625665904998776

In [6]:
mech.l

np.float64(1.4142135623730951)

In [5]:
kwargs["lev_upper_bound"] = 0.8
mech = variantLS_mech(kwargs)
mech.find_minimal_sigma(epsilon, delta)

 leverage upper bound is 0.8


current sigma is 50.0005, and the corresponding delta is 0.0; leverage is 0.0007991848313920617, augmented leverage is 0.0015983696627841235
ratio is 0.9998002037921521
current sigma is 25.00075, and the corresponding delta is 1.6773426314915252e-21; leverage is 0.0031870605313742673, augmented leverage is 0.006374121062748535
ratio is 0.9992032348671567
current sigma is 12.500875, and the corresponding delta is 0.0029110452168336757; leverage is 0.012596689371815127, augmented leverage is 0.025193378743630254
ratio is 0.9968508276570461
current sigma is 18.750812500000002, and the corresponding delta is 1.1017876867601741e-08; leverage is 0.005648234144076839, augmented leverage is 0.011296468288153678
ratio is 0.998587941463981
current sigma is 15.625843750000001, and the corresponding delta is 2.3792357504231048e-05; leverage is 0.008108097384620733, augmented leverage is 0.016216194769241465
ratio is 0.9979729756538449
current sigma is 17.188328125, and the corresponding delta is 7

16.6111702003479

In [8]:
# mech = variantLS_mech(kwargs)

# epsilon = 1
# delta = 1/mech.n
num_samples = 100

tic = time.perf_counter()
samples = mech._gen_samples(epsilon, delta, num_samples)
toc = time.perf_counter()
logging.critical(f"cost {toc - tic:0.4f} seconds")

 leverage upper bound is 0.01
current sigma is 50.0005, and the corresponding delta is 0.0; leverage is 2.143269605455092e-06, augmented leverage is 4.286539210910184e-06
ratio is 0.9997878163090599
current sigma is 25.00075, and the corresponding delta is 0.0; leverage is 8.56722724285241e-06, augmented leverage is 1.713445448570482e-05
ratio is 0.9991518445029578
current sigma is 12.500875, and the corresponding delta is 0.0; leverage is 3.4178333010891334e-05, augmented leverage is 6.835666602178267e-05
ratio is 0.9966163450319219
current sigma is 6.2509375, and the corresponding delta is 0.0; leverage is 0.0001353044141785313, augmented leverage is 0.0002706088283570626
ratio is 0.9866048629963254
current sigma is 3.12596875, and the corresponding delta is 0.0; leverage is 0.0005199481520029545, augmented leverage is 0.001039896304005909
ratio is 0.9485251329517077
current sigma is 1.563484375, and the corresponding delta is 0.0; leverage is 0.0017982091058252427, augmented leverag

2025-11-13 11:48:35,366 cost 22.6664 seconds


current sigma is 0.45799616241455077, and the corresponding delta is 3.0850522354946054e-06; leverage is 0.007187079136110067, augmented leverage is 0.014374158272220133
ratio is 0.28847916552510344


In [9]:
tic = time.perf_counter()
accuracy_estimator = LS_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_square_error(samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-11-13 11:49:29,330 (bootstrap_result(lb_one_sided=np.float64(0.1950119291967071), lb_two_sided=np.float64(0.1937075441900612), ub_one_sided=np.float64(0.20113193528622172), ub_two_sided=np.float64(0.20139139385951269)), bootstrap_result(lb_one_sided=np.float64(1.2317036504291654), lb_two_sided=np.float64(1.2298849965044754), ub_one_sided=np.float64(1.2422041608212189), ub_two_sided=np.float64(1.243076421641407)))
2025-11-13 11:49:29,332 cost 0.1090 seconds


In [10]:
tic = time.perf_counter()
accuracy_estimator = LS_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_square_error(samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-11-13 09:39:43,608 (bootstrap_result(lb_one_sided=np.float64(0.4607513466947175), lb_two_sided=np.float64(0.4595056614648668), ub_one_sided=np.float64(0.4690084136323029), ub_two_sided=np.float64(0.46936730375378743)), bootstrap_result(lb_one_sided=np.float64(1.815252909076769), lb_two_sided=np.float64(1.8141887008258823), ub_one_sided=np.float64(1.8419966154992022), ub_two_sided=np.float64(1.8437951941565414)))
2025-11-13 09:39:43,609 cost 0.0494 seconds


In [17]:
mech = OptimalLS_mech(kwargs)

epsilon = 1
delta = 1/mech.n
num_samples = 100

tic = time.perf_counter()
samples = mech._gen_samples(epsilon, delta, num_samples)
toc = time.perf_counter()
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-11-13 09:48:43,768 cost 2.5845 seconds


In [5]:
tic = time.perf_counter()
accuracy_estimator = LS_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_square_error(samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-11-13 09:38:06,650 (bootstrap_result(lb_one_sided=np.float64(0.4653680139331255), lb_two_sided=np.float64(0.46516645607205787), ub_one_sided=np.float64(0.47349329442199656), ub_two_sided=np.float64(0.4746781902227046)), bootstrap_result(lb_one_sided=np.float64(1.8309780288034758), lb_two_sided=np.float64(1.8290515947371613), ub_one_sided=np.float64(1.8603918666022121), ub_two_sided=np.float64(1.862313017486217)))
2025-11-13 09:38:06,653 cost 0.0409 seconds


In [7]:
mech = LS_fromoptim_RP_mech(kwargs)

epsilon = 1
delta = 1/mech.n

mech.find_minimal_sigma(epsilon, delta)

  exp_term = np.exp(epsilon - r / 2 * np.log(1 - leverage)) * (1 - leverage) ** (r / 2.0)
  exp_term = np.exp(epsilon - r / 2 * np.log(1 - leverage)) * (1 - leverage) ** (r / 2.0)


np.float64(0.7452001524823914)

In [8]:
mech = LS_fromoptim_RP_mech(kwargs)

epsilon = 1
delta = 1/mech.n
num_samples = 100

tic = time.perf_counter()
samples = mech._gen_samples(epsilon, delta, num_samples)
toc = time.perf_counter()
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-11-12 12:52:13,953 cost 1465.3263 seconds


In [9]:
tic = time.perf_counter()
accuracy_estimator = LS_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_square_error(samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-11-12 12:52:14,006 (bootstrap_result(lb_one_sided=np.float64(0.39334163994515675), lb_two_sided=np.float64(0.39314676668066995), ub_one_sided=np.float64(0.4010155931271154), ub_two_sided=np.float64(0.4016755392326422)), bootstrap_result(lb_one_sided=np.float64(1.6213811153652622), lb_two_sided=np.float64(1.620151913268019), ub_one_sided=np.float64(1.6412651347723624), ub_two_sided=np.float64(1.6416273664342966)))
2025-11-12 12:52:14,006 cost 0.0411 seconds


In [10]:
mech = ALT19LS_mech(kwargs)

epsilon = 1
delta = 1/mech.n

mech.compute_constant(epsilon, delta)

np.float64(2.0599687150443087)

In [11]:
mech = ALT19LS_mech(kwargs)

epsilon = 1
delta = 1/mech.n
num_samples = 100

tic = time.perf_counter()
samples = mech._gen_samples(epsilon, delta, num_samples)
toc = time.perf_counter()
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-11-12 13:32:46,734 cost 2431.8648 seconds


In [12]:
tic = time.perf_counter()
accuracy_estimator = LS_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_square_error(samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-11-12 13:32:46,806 (bootstrap_result(lb_one_sided=np.float64(0.8246889682525818), lb_two_sided=np.float64(0.824040036222986), ub_one_sided=np.float64(0.8318516497819459), ub_two_sided=np.float64(0.8332473662206419)), bootstrap_result(lb_one_sided=np.float64(5.496763779743661), lb_two_sided=np.float64(5.492214830762541), ub_one_sided=np.float64(5.720464415003367), ub_two_sided=np.float64(5.733842194162772)))
2025-11-12 13:32:46,807 cost 0.0487 seconds
