In [1]:
import os
import sys
import numpy as np

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
src_dir = os.path.join(project_dir, 'src')
log_dir = os.path.join(project_dir, 'log')
fig_dir = os.path.join(project_dir, 'fig')

# Add the src directory to sys.path
sys.path.append(src_dir)


from Dataset.cancer_dataset_for_LS import main_generate_cancer_matrices_for_LS, load_cancer_dataset_matrices_for_LS
from analysis.commons import data_normalize_by_features, concatenate_B_b

from estimator.NDIS import BasicNDISEstimator

In [2]:
file_X_path = os.path.join(project_dir, "Dataset", "cancer-LR-X.txt")
file_y_path = os.path.join(project_dir, "Dataset", "cancer-LR-y.txt")

X = None
y = None
try:
    X, y = load_cancer_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
except:
    main_generate_cancer_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
    X, y = load_cancer_dataset_matrices_for_LS()

B, b = data_normalize_by_features(X, y)
A = concatenate_B_b(B, b)

In [3]:
from numpy.linalg import inv as mat_inv
from analysis.commons import twoNorm
from scipy.linalg import sqrtm, eigh

M = B.T @ B
inv_M = mat_inv(M)
sqrt_inv_M = sqrtm(inv_M)

In [4]:
from analysis.commons import compute_xopt, get_w, get_neighbor_B, get_neighbor_b

index = 186
d = B.shape[1]

r = 300
xopt = compute_xopt(B, b)
w = get_w(B, b)
cov = (np.linalg.norm(w) ** 2 / r) * np.linalg.inv(B.T @ B)

neighbor_B = get_neighbor_B(index, B)
neighbor_b = get_neighbor_b(index, b)
neighbor_w = get_w(neighbor_B, neighbor_b)
neighbor_xopt = compute_xopt(neighbor_B, neighbor_b)
neighbor_cov = (np.linalg.norm(neighbor_w) ** 2 / r) * np.linalg.inv(neighbor_B.T @ neighbor_B)


In [5]:
eps = 0.1

In [13]:
# compute leverage and residual 
## First Expression 
from numpy.linalg import inv as mat_inv
from analysis.commons import twoNorm
from scipy.linalg import sqrtm, eigh

M = B.T @ B
inv_M = mat_inv(M)
sqrt_inv_M = sqrtm(inv_M)
v = B[index].copy().reshape(-1, 1)
pi = (v.T @ inv_M @ v).item()
u = A[index].copy().reshape(-1, 1)
qi = (u.T @ mat_inv(A.T @ A) @ u).item()

# compute residual
ei = w[index]
error = twoNorm(w)

estimator = BasicNDISEstimator(neighbor_xopt, neighbor_cov, xopt, cov, workers=50, num_samples=2**16)

# estimator._set_Abc(A1, b1, c1)

method = "pqmc"
print(f"delta of ALS for index {index}: {estimator.estimate(eps, method=method)} with epsilon = {eps}")




delta of ALS for index 186: {'delta': 0.25827762209949223, 'se': 1.6295910463792636e-08} with epsilon = 0.1


In [7]:
# e1 = np.eye(d)[0].reshape(-1, 1)
# neighbor_M = M - v@v.T
# sqrt_inv_neighbor_M = sqrtm(mat_inv(neighbor_M))

# raw_A2 = np.eye(d) - (1 - ei**2/((1-pi)*error**2)) * sqrt_inv_neighbor_M@M@sqrt_inv_neighbor_M
# A2, U2 = eigh(raw_A2)


# b2 = (np.sqrt(r*pi)*ei*np.sqrt((1-pi)*error**2 - ei**2)/(error**2*((1-pi)**2))*e1).ravel()

# c2 = 0.5*np.log((1- ei**2/((1-pi)*error**2))**d*(1/(1-pi))) - 0.5*(r*ei**2*pi)/(error**2*(1-pi)**2)

# estimator._set_Abc(A2, b2, c2)

# method = "pqmc"
# print(f"delta of ALS for index {index}: {estimator.estimate(eps, method=method)} with epsilon = {eps}")

In [8]:
e1 = np.eye(d)[0].reshape(-1, 1)
c21 = - (1 - ei**2/((1-pi)*error**2))*pi/(1-pi)
c22 = ei**2/((1-pi)*error**2)

A2 = np.diag(c21*e1@e1.T + c22*np.eye(d))
b2 = (np.sqrt(r*pi)*ei*np.sqrt((1-pi)*error**2 - ei**2)/(error**2*((1-pi)**2))*e1).ravel()
c2 = 0.5*np.log((1- ei**2/((1-pi)*error**2))**d*(1/(1-pi))) - 0.5*(r*ei**2*pi)/(error**2*(1-pi)**2)

estimator._set_Abc(A2, b2, c2)

method = "pqmc"
print(f"delta of ALS for index {index}: {estimator.estimate(eps, method=method)} with epsilon = {eps}")

delta of ALS for index 186: {'delta': 0.25827763333291665, 'se': 7.2990962747854535e-09} with epsilon = 0.1


In [9]:
# Estimation using NDIS

# estimator = BasicNDISEstimator(xopt, cov, neighbor_xopt, neighbor_cov, workers=50, num_samples=2**16)
# method = "pqmc"

# print(f"delta of ALS for index {index}: {estimator.estimate(eps, method=method)} with epsilon = {eps}")

In [10]:
# Analytical result
# from analysis.ALS_privacy_analysis import compute_analytical_asymptotic_dist_delta

# compute_analytical_asymptotic_dist_delta(index, B, b, eps, r)

In [11]:
from analysis.least_square_numerical import radial_expectation_mc
from analysis.least_square_numerical import radial_expectation_qmc
from analysis.least_square_numerical import radial_expectation_quad_adaptive_mp_left

lambda2 = c22
lambda1 = lambda2 + c21
lambda3 = -np.sqrt(r*pi)*ei*np.sqrt((1-pi)*error**2 - ei**2)/(error**2*((1-pi)**2))

est, se = radial_expectation_mc(lambda1, lambda2, lambda3, c2+eps, d, n_samples=10**7)
print(f"MC estimate: {est:.6g} ± {1.96*se:.3g} (95% CI)")

est, se = radial_expectation_qmc(lambda1, lambda2, lambda3, c2+eps, d, n_points=2**18, antithetic=True, nbatches=16)
print(f"QMC estimate: {est:.6g} ± {1.96*se:.3g} (95% CI)")

val = radial_expectation_quad_adaptive_mp_left(lambda1, lambda2, lambda3, c2+eps, d, dps=100, workers=32)
print("Adaptive piecewise estimate:", val)

MC estimate: 0.258253 ± 0.000217 (95% CI)
QMC estimate: 0.258278 ± 6.07e-07 (95% CI)
Adaptive piecewise estimate: 0.2582776282244506656475214290578653087030623336069636358523915302622448937763321309001738487900341866


In [23]:
lam2 = (qi - pi)/(1-pi)
lam1 = (qi - 2*pi + pi**2)/(1-pi)**2
lam3 = - np.sqrt(r*pi*(1-qi)*(qi-pi))/((1-pi)**2)
c2 = 0.5*(d*np.log(1-qi) - (d+1)*np.log(1-pi)) - 0.5*r*pi*(qi-pi)/(1-pi)**2
val = radial_expectation_quad_adaptive_mp_left(lam1, lam2, lam3, c2+eps, d, dps=100, workers=32)
print("Adaptive piecewise estimate:", val)

Adaptive piecewise estimate: 0.2582776282075556874511387084694809692033533058273132493782385375401889876150454970725765710494790736
