In [1]:
import numpy as np
from scipy.special import erf
import os
import sys
import time
import copy
import logging
import matplotlib.pyplot as plt

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
src_dir = os.path.join(project_dir, 'src')
log_dir = os.path.join(project_dir, 'log')
fig_dir = os.path.join(project_dir, 'fig')

# Add the src directory to sys.path
sys.path.append(src_dir)


from utils.constants import WORKERS, BATCH_SAMPLES
from utils.commons import convert_bytes_to_mb, convert_bytes_to_gb
from Dataset.flight_dataset_for_LS import load_flight_dataset_matrices_for_LS, save_flight_dataset_matrices_for_LS
from Dataset.cancer_dataset_for_LS import main_generate_cancer_matrices_for_LS, load_cancer_dataset_matrices_for_LS
from Dataset.songs_dataset_for_LS import load_song_dataset_matrices_for_LS
from analysis.commons import data_normalize_by_features, concatenate_B_b, twoNorm, data_normalize_by_sample

from RP_mechanisms.optim_RP_DP import OptimalRP_mech, compute_largest_l2
from RP_mechanisms.alt19_RP_DP import ALT19RP_mech
from RP_mechanisms.RP_accuracy_estimator import RP_accuracy_estimator

from analysis.commons import compute_xopt, split_to_B_b

In [2]:
logfile_path = os.path.join(log_dir, 'RP-cancer-default.log')
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(message)s",
    handlers=[
        logging.FileHandler(logfile_path),
        logging.StreamHandler()
    ]
)

def store_array_str(data_array):
    converted_str = np.array_str(data_array)
    converted_str = converted_str.replace(' ',',')
    return converted_str

file_X_path = os.path.join(project_dir, "Dataset", "cancer-LR-X.txt")
file_y_path = os.path.join(project_dir, "Dataset", "cancer-LR-y.txt")

X = None
y = None
try:
    X, y = load_cancer_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
except:
    main_generate_cancer_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
    X, y = load_cancer_dataset_matrices_for_LS()

In [3]:
B, b = data_normalize_by_features(X, y)
A = concatenate_B_b(B, b)

def generate_default_configuration():
    kwargs = {
        'database': A,
        'r' : 300,
        'CI' : 0.95,
        'bootstrap_samples': 100,
        'l2' : compute_largest_l2(A)
    }
    
    return kwargs

kwargs = generate_default_configuration()

In [4]:
mech = OptimalRP_mech(kwargs)

epsilon = 1
delta = 1/mech.n
num_samples = 100
sigma = mech.find_minimal_sigma(epsilon, delta)
print(sigma)

6.850481817751286


  exp_term = np.exp(epsilon - r / 2 * np.log(1 - leverage)) * (1 - leverage) ** (r / 2.0)
  exp_term = np.exp(epsilon - r / 2 * np.log(1 - leverage)) * (1 - leverage) ** (r / 2.0)


In [5]:
mech = OptimalRP_mech(kwargs)

epsilon = 1
delta = 1/mech.n
num_samples = 100
sigma = mech.find_minimal_sigma(epsilon, delta)
samples = mech._gen_samples(epsilon, delta=delta, num_samples=num_samples)

In [6]:
accuracy_estimator = RP_accuracy_estimator(kwargs)
accuracy_estimator.estimate_pairwise_distance_acc(samples)

bootstrap_result(lb_one_sided=18.94835466586992, lb_two_sided=18.94002408522496, ub_one_sided=19.02415219180172, ub_two_sided=19.03005572862427)

In [7]:
accuracy_estimator.estimate_dot_product_acc(samples)

bootstrap_result(lb_one_sided=0.39524544251123056, lb_two_sided=0.3949484416235925, ub_one_sided=0.40104803495098, ub_two_sided=0.40147782441690266)

In [8]:
epsilon = 1
num_samples = 100
alt_mech = ALT19RP_mech(kwargs)
delta = 1/alt_mech.n
alt_mech.compute_constant(epsilon, delta)
samples = alt_mech._gen_samples(epsilon, delta=delta, num_samples=num_samples)

accuracy_estimator = RP_accuracy_estimator(kwargs)
accuracy_estimator.estimate_pairwise_distance_acc(samples)

bootstrap_result(lb_one_sided=55.84380990629334, lb_two_sided=55.834788505687385, ub_one_sided=56.10716319139692, ub_two_sided=56.12636874540118)

In [9]:
accuracy_estimator.estimate_dot_product_acc(samples)

bootstrap_result(lb_one_sided=0.3819243069492151, lb_two_sided=0.38167802597042066, ub_one_sided=0.3880936483501558, ub_two_sided=0.388620249021518)