In [6]:
import os
import sys
import numpy as np

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
src_dir = os.path.join(project_dir, 'src')
log_dir = os.path.join(project_dir, 'log')
fig_dir = os.path.join(project_dir, 'fig')

# Add the src directory to sys.path
sys.path.append(src_dir)


from Dataset.cancer_dataset_for_LS import main_generate_cancer_matrices_for_LS, load_cancer_dataset_matrices_for_LS
from analysis.commons import data_normalize_by_features, concatenate_B_b

from estimator.NDIS import BasicNDISEstimator
from analysis.RP_privacy_analysis_advanced import _compute_gamma_delta, rp_find_rho_for_delta

In [7]:
# Example usage
r = 300
epsilon = 3
delta_target = 1e-6  # Example: find rho that gives delta = 0.01

rho_found = rp_find_rho_for_delta(delta_target, epsilon, r)
print(f"For delta = {delta_target}, epsilon = {epsilon}, r = {r}")
print(f"Found rho = {rho_found:.6f}")

t = 1/np.sqrt(rho_found)
print(f"t is {t}")

For delta = 1e-06, epsilon = 3, r = 300
Found rho = 1.047950
t is 0.9768542700518529


In [8]:
file_X_path = os.path.join(project_dir, "Dataset", "cancer-LR-X.txt")
file_y_path = os.path.join(project_dir, "Dataset", "cancer-LR-y.txt")

X = None
y = None
try:
    X, y = load_cancer_dataset_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
except:
    main_generate_cancer_matrices_for_LS(file_X_name=file_X_path, file_y_name=file_y_path)
    X, y = load_cancer_dataset_matrices_for_LS()

B, b = data_normalize_by_features(X, y)
A = concatenate_B_b(B, b)

In [9]:
# Compute all eigenvalues (using eigvalsh for symmetric/Hermitian matrices - more efficient)
eigenvalues = np.linalg.eigvalsh(B.T @ B)

# Get the smallest eigenvalue
smallest_eigenvalue = np.min(eigenvalues)

print(f"Smallest eigenvalue of B^T B: {smallest_eigenvalue:.10e}")

Smallest eigenvalue of B^T B: 1.0891043352e-04


In [10]:
# Compute the norm of each row of B
row_norms = np.linalg.norm(B, axis=1)

# Get the largest row norm
largest_row_norm = np.max(row_norms)

# Find which row has the largest norm
row_index = np.argmax(row_norms)

print(f"Largest row norm of B: {largest_row_norm**2:.10e}")
print(f"Row index with largest norm: {row_index}")
print(f"Shape of B: {B.shape}")
print(f"Number of rows: {B.shape[0]}")

Largest row norm of B: 1.2264706363e+00
Row index with largest norm: 186
Shape of B: (2809, 12)
Number of rows: 2809
