In [1]:
import numpy as np
from scipy.special import erf
import os
import sys
import time
import copy
import logging
import matplotlib.pyplot as plt

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
src_dir = os.path.join(project_dir, 'src')
log_dir = os.path.join(project_dir, 'log')
fig_dir = os.path.join(project_dir, 'fig')

# Add the src directory to sys.path
sys.path.append(src_dir)


from Dataset.flight_dataset_for_LS import load_flight_dataset_matrices_for_LS, save_flight_dataset_matrices_for_LS
from analysis.commons import data_normalize_by_features, concatenate_B_b

from RP_mechanisms.optim_RP import OptimalRP_mech, parallel_gen_samples
from RP_mechanisms.alt19_RP import ALT19RP_mech
from RP_mechanisms.RP_accuracy_estimator import RP_accuracy_estimator
from RP_mechanisms.isit18_RP import ISIT18RP_mech

In [2]:
logfile_path = os.path.join(log_dir, 'RP-flight-default.log')
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(message)s",
    handlers=[
        logging.FileHandler(logfile_path),
        logging.StreamHandler()
    ]
)

def store_array_str(data_array):
    converted_str = np.array_str(data_array)
    converted_str = converted_str.replace(' ',',')
    return converted_str

file_X_path = os.path.join(project_dir, "Dataset", "flight-LR-X.txt")
file_y_path = os.path.join(project_dir, "Dataset", "flight-LR-y.txt")

X = None
y = None
try:
    X, y = load_flight_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
except:
    save_flight_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
    X, y = load_flight_dataset_matrices_for_LS()
    
B, b = data_normalize_by_features(X, y)
A = concatenate_B_b(B, b)

In [3]:
def generate_default_configuration():
    kwargs = {
        'database': A,
        'r' : 1200,
        'CI' : 0.95,
        'bootstrap_samples': 100
    }
    
    return kwargs

kwargs = generate_default_configuration()

In [4]:
mech = OptimalRP_mech(kwargs)

epsilon = 1
delta = 1/mech.n
num_samples = 100
sigma = mech.find_minimal_sigma(epsilon, delta)
# samples = mech._gen_samples(epsilon, delta=delta, num_samples=num_samples)
print(sigma)

0.7452001524823914


  exp_term = np.exp(epsilon - r / 2 * np.log(1 - leverage)) * (1 - leverage) ** (r / 2.0)
  exp_term = np.exp(epsilon - r / 2 * np.log(1 - leverage)) * (1 - leverage) ** (r / 2.0)


In [5]:
isit_mech = ISIT18RP_mech(kwargs)

epsilon = 1
delta = 1/isit_mech.n
num_samples = 100
tic = time.perf_counter()
isit_samples = parallel_gen_samples(isit_mech, epsilon, delta, num_samples, workers=50)
toc = time.perf_counter()
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-10-28 20:15:01,567 cost 33.2889 seconds


In [6]:
tic = time.perf_counter()
accuracy_estimator = RP_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_pairwise_distance_acc(isit_samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-10-28 20:15:04,575 bootstrap_result(lb_one_sided=np.float64(110.93471753947638), lb_two_sided=np.float64(110.8437829972806), ub_one_sided=np.float64(111.45245487554922), ub_two_sided=np.float64(111.4571118897297))
2025-10-28 20:15:04,578 cost 3.0012 seconds


In [7]:
mech = OptimalRP_mech(kwargs)

epsilon = 1
delta = 1/mech.n
num_samples = 100

tic = time.perf_counter()
samples = parallel_gen_samples(mech, epsilon, delta, num_samples, workers=50)
toc = time.perf_counter()
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-10-28 20:15:41,161 cost 34.1986 seconds


In [8]:
tic = time.perf_counter()
accuracy_estimator = RP_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_pairwise_distance_acc(samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-10-28 20:15:47,352 bootstrap_result(lb_one_sided=np.float64(0.9965823639461691), lb_two_sided=np.float64(0.9961029872274312), ub_one_sided=np.float64(1.002235654755876), ub_two_sided=np.float64(1.0024070554334523))
2025-10-28 20:15:47,354 cost 6.1832 seconds


In [9]:
alt_mech = ALT19RP_mech(kwargs)
# samples = alt_mech._gen_samples(epsilon, delta=delta, num_samples=num_samples)

tic = time.perf_counter()
alt_samples = parallel_gen_samples(alt_mech, epsilon, delta, num_samples, workers=50)
toc = time.perf_counter()

logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-10-28 20:16:23,004 cost 35.6322 seconds


In [10]:
tic = time.perf_counter()
accuracy_estimator = RP_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_pairwise_distance_acc(alt_samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-10-28 20:16:29,272 bootstrap_result(lb_one_sided=np.float64(3.9759475108624516), lb_two_sided=np.float64(3.9750695748692646), ub_one_sided=np.float64(4.0017063914800435), ub_two_sided=np.float64(4.004510404689144))
2025-10-28 20:16:29,273 cost 6.2607 seconds


In [11]:
tic = time.perf_counter()
samples = parallel_gen_samples(mech, epsilon, delta=1, num_samples=num_samples, workers=50)
toc = time.perf_counter()
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-10-28 20:17:06,005 cost 36.7165 seconds


In [12]:
tic = time.perf_counter()
accuracy_estimator = RP_accuracy_estimator(kwargs)
boot_res = accuracy_estimator.estimate_pairwise_distance_acc(samples)
toc = time.perf_counter()
logging.critical(boot_res)
logging.critical(f"cost {toc - tic:0.4f} seconds")

2025-10-28 20:17:12,704 bootstrap_result(lb_one_sided=np.float64(0.9995886956455713), lb_two_sided=np.float64(0.9989047671099178), ub_one_sided=np.float64(1.0040206547187411), ub_two_sided=np.float64(1.0042920624226048))
2025-10-28 20:17:12,706 cost 6.6915 seconds
