In [8]:
from Helpers import helpers

In [9]:
from sklearn.datasets import make_blobs

n_samples = 2000
X, y_true = make_blobs(n_samples=n_samples, centers=4, n_features=3,
                       cluster_std=0.60, random_state=0)
X.shape

(2000, 3)

In [10]:
_, perturbed_df = helpers.load_plain_and_perturbed_dataset(3, import_path='./export/', perturbed_path='./export/truncated/')

In [17]:

import numpy as np
from scipy.stats import gamma

def generate_unit_sphere(): 
    vector = np.random.randn(3)
    vector /= np.linalg.norm(vector)

    #polar_angle = np.arccos(vector[2])
    #azimuth = np.arctan2(vector[1], vector[0])
    theta = 2 * np.random.uniform(0, np.pi)
    psi = np.arccos(2*np.random.uniform() - 1)
    return theta, psi, vector

def generate_3D_noise_for_dataset(X, epsilon):
    Z = []
    for x in X:
        noise = generate_3D_noise(epsilon)
        z = x + noise
        Z.append(z)
    return Z
def generate_3D_noise(epsilon): 
    polar_angle, azimuth, _ = generate_unit_sphere() # theta, psi
    r = gamma.rvs(3, scale=1/epsilon)
    # theta = 2 * np.pi * u[0]
    #theta = np.random.rand() * np.pi
    #phi = np.arccos(2 * u[1] - 1)
    #phi = np.random.rand() * np.pi*2 # 
    # https://mathworld.wolfram.com/SphericalCoordinates.html formula 4/5/6
    x = r * np.sin(polar_angle) * np.sin(azimuth)
    y = r * np.sin(polar_angle) * np.cos(azimuth)
    z = r * np.cos(polar_angle)
    return x, y, z

def remap_to_closted(perturbed_dataset, original_dataset, grid): 
    X, Y, Z = grid

        # Define the domain of the original dataset
    X_min, X_max = original_dataset[:, 0].min(), original_dataset[:, 0].max()
    Y_min, Y_max = original_dataset[:, 1].min(), original_dataset[:, 1].max()
    Z_min, Z_max = original_dataset[:, 2].min(), original_dataset[:, 2].max()
    domain_X = ((X_min, X_max), (Y_min, Y_max), (Z_min, Z_max))

    # Find the indices of the closest points in the original dataset for each point in the perturbed dataset
    indices_X = np.argmin(np.sum((perturbed_dataset[:, np.newaxis] - original_dataset[np.newaxis, :]) ** 2, axis=-1), axis=-1)

    # Check which points in the perturbed dataset are outside the domain of the original dataset
    outside_domain_X = np.logical_or(perturbed_dataset[:, 0] < domain_X[0][0], perturbed_dataset[:, 0] > domain_X[0][1])
    outside_domain_X = np.logical_or(outside_domain_X, perturbed_dataset[:, 1] < domain_X[1][0])
    outside_domain_X = np.logical_or(outside_domain_X, perturbed_dataset[:, 1] > domain_X[1][1])
    outside_domain_X = np.logical_or(outside_domain_X, perturbed_dataset[:, 2] < domain_X[2][0])
    outside_domain_X = np.logical_or(outside_domain_X, perturbed_dataset[:, 2] > domain_X[2][1])

    # Find the indices of the closest points in the meshgrid for each point in the perturbed dataset
    indices_M = np.argmin(np.sum((perturbed_dataset[:, np.newaxis] - np.array([X.ravel(), Y.ravel(), Z.ravel()]).T[np.newaxis, :]) ** 2, axis=-1), axis=-1)
    indices_M = np.unravel_index(indices_M, X.shape)

    # Check which points in the perturbed dataset are outside the domain of the meshgrid
    outside_domain_M = np.logical_or(perturbed_dataset[:, 0] < X_min, perturbed_dataset[:, 0] > X_max)
    outside_domain_M = np.logical_or(outside_domain_M, perturbed_dataset[:, 1] < Y_min)
    outside_domain_M = np.logical_or(outside_domain_M, perturbed_dataset[:, 1] > Y_max)
    outside_domain_M = np.logical_or(outside_domain_M, perturbed_dataset[:, 2] < Z_min)
    outside_domain_M = np.logical_or(outside_domain_M, perturbed_dataset[:, 2] > Z_max)

    # Remap the points outside the domain of the original dataset to the closest points in the original dataset
    remapped_dataset = perturbed_dataset.copy()
    remapped_dataset[outside_domain_X, :] = original_dataset[indices_X[outside_domain_X], :]

    # Remap the points outside the domain of the meshgrid to the closest points in the meshgrid
    remapped_dataset[outside_domain_M, 0] = X[indices_M][outside_domain_M]
    remapped_dataset[outside_domain_M, 1] = Y[indices_M][outside_domain_M]
    remapped_dataset[outside_domain_M, 2] = Z[indices_M][outside_domain_M]
    return remapped_dataset

def generate_truncated_perturbed_dataset(X, epsilon):
    meshgrid = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), num=6), np.linspace(X[:, 1].min(), X[:, 1].max(), num=6), np.linspace(X[:, 2].min(), X[:, 2].max(), num=6), indexing='ij')

    Z = generate_3D_noise_for_dataset(X, epsilon)
    Z = np.array(Z)
    Z = remap_to_closted(Z, X, meshgrid)
    return Z

In [12]:
test_df = helpers.run_mi_experiments(X, y_true, helpers.get_experiment_epsilons(), n_times=20, algorithm=generate_3D_noise_for_dataset)



0.064 0.012




1.0 1.0




0.004 0.0




0.004 0.0




0.008 0.0




1.0 1.0




0.008 0.0




0.012 0.0




0.004 0.0




1.0 1.0




1.0 1.0




0.044 0.0




1.0 1.0




1.0 1.0




0.064 0.228




0.008 0.0




1.0 1.0




0.004 0.0




0.036 0.0




0.028 0.0




0.008 0.0




0.068 0.0




0.012 0.0




0.024 0.0




0.04 0.004




0.056 0.032




0.08 0.008




0.044 0.004




0.052 0.04




0.104 0.052




0.04 0.0




0.016 0.128




0.012 0.0




0.008 0.0




0.024 0.0




0.02 0.0




1.0 1.0




0.056 0.112




1.0 1.0




0.016 0.0




0.148 0.06




0.184 0.068




0.232 0.224




0.136 0.06




0.076 0.028




0.096 0.08




0.144 0.176




0.144 0.112




0.144 0.196




0.172 0.204




0.136 0.096




0.16 0.136




0.156 0.036




0.152 0.148




0.132 0.088




0.112 0.252




0.176 0.132




0.116 0.176




0.164 0.224




0.156 0.08




0.388 0.404




0.28 0.212




0.416 0.388




0.516 0.548




0.292 0.284




0.288 0.196




0.356 0.356




0.376 0.32




0.316 0.396




0.38 0.232




0.244 0.132




0.376 0.444




0.388 0.296




0.348 0.268




0.324 0.184




0.308 0.288




0.38 0.24




0.424 0.428




0.224 0.196




0.304 0.288




0.38 0.292




0.504 0.38




0.464 0.384




0.352 0.252




0.488 0.344




0.54 0.428




0.432 0.304




0.488 0.372




0.5 0.38




0.476 0.456




0.476 0.372




0.508 0.508




0.424 0.4




0.476 0.332




0.408 0.352




0.524 0.372




0.424 0.376




0.492 0.384




0.492 0.468




0.548 0.428




0.488 0.352




0.552 0.412




0.484 0.428




0.608 0.544




0.564 0.404




0.536 0.408




0.696 0.604




0.504 0.476




0.596 0.536




0.448 0.34




0.536 0.436




0.332 0.404




0.568 0.48




0.6 0.464




0.64 0.472




0.564 0.468




0.54 0.464




0.624 0.56




0.628 0.424




0.56 0.52




0.632 0.44




0.712 0.52




0.66 0.568




0.748 0.484




0.596 0.46




0.528 0.424




0.596 0.48




0.572 0.492




0.7 0.532




0.64 0.516




0.648 0.44




0.604 0.46




0.596 0.428




0.532 0.412




0.664 0.496




0.632 0.512




0.568 0.416




0.596 0.532




0.668 0.448




0.66 0.56




0.68 0.428




0.62 0.5




0.572 0.42




0.64 0.528




0.648 0.42




0.664 0.46




0.632 0.448




0.66 0.512




0.564 0.508




0.692 0.58




0.652 0.5




0.572 0.336




0.624 0.524




0.588 0.4




0.612 0.468




0.628 0.484




0.656 0.484




0.696 0.584




0.624 0.468




0.616 0.492




0.604 0.4




0.564 0.356




0.58 0.388




0.568 0.468




0.5 0.328




0.52 0.328




0.644 0.484




0.596 0.436




0.688 0.564




0.676 0.572




0.732 0.552




0.624 0.516




0.708 0.56




0.496 0.368




0.596 0.46




0.58 0.428




0.636 0.476




0.664 0.504




0.592 0.396




0.512 0.356




In [14]:
test_df.to_csv('./export/privacy/report-advantages-' + str(n_samples) + '.csv')

In [19]:
truncated = helpers.run_mi_experiments(X, y_true, helpers.get_experiment_epsilons(), n_times=20, algorithm=generate_truncated_perturbed_dataset)



0.008 0.0




0.016 0.0




0.012 0.0




0.008 0.0




0.008 0.0




0.02 0.0




0.012 0.028




1.0 1.0




0.004 0.0




1.0 1.0




0.02 0.0




0.012 0.008




1.0 1.0




1.0 1.0




0.012 0.0




0.004 0.0




1.0 1.0




0.004 0.008




1.0 1.0




0.012 0.052




0.004 0.0




0.048 0.036




0.0 0.008




0.02 0.004




0.016 0.0




1.0 1.0




0.008 0.0




0.004 0.0




0.004 0.0




1.0 1.0




0.004 0.0




0.008 0.016




0.036 0.0




1.0 1.0




0.032 0.0




0.016 0.004




0.04 0.016




0.036 0.0




0.008 0.072




1.0 1.0




0.288 0.088




0.272 0.224




0.304 0.196




0.136 0.072




0.236 0.06




0.324 0.244




0.172 0.128




0.188 0.144




0.152 0.036




0.2 0.036




0.112 0.064




0.372 0.216




0.232 0.04




0.148 0.12




0.184 0.136




0.272 0.08




0.164 0.048




0.132 0.112




0.192 0.076




0.228 0.148




0.332 0.312




0.436 0.2




0.376 0.272




0.516 0.316




0.408 0.368




0.456 0.32




0.38 0.2




0.384 0.352




0.46 0.316




0.46 0.328




0.38 0.164




0.46 0.22




0.34 0.288




0.424 0.272




0.364 0.216




0.512 0.4




0.472 0.2




0.448 0.272




0.408 0.368




0.492 0.316




0.52 0.472




0.428 0.36




0.284 0.176




0.62 0.432




0.576 0.376




0.52 0.404




0.56 0.412




0.572 0.352




0.528 0.404




0.532 0.368




0.524 0.404




0.48 0.44




0.56 0.528




0.516 0.296




0.484 0.368




0.36 0.28




0.596 0.48




0.536 0.516




0.508 0.388




0.668 0.436




0.504 0.376




0.608 0.484




0.616 0.468




0.524 0.36




0.528 0.408




0.588 0.408




0.644 0.472




0.48 0.44




0.488 0.4




0.688 0.508




0.584 0.468




0.58 0.496




0.536 0.388




0.616 0.476




0.668 0.492




0.62 0.448




0.544 0.36




0.664 0.528




0.532 0.496




0.604 0.548




0.588 0.416




0.652 0.476




0.672 0.472




0.656 0.476




0.604 0.428




0.752 0.6




0.516 0.36




0.652 0.504




0.544 0.34




0.528 0.416




0.712 0.588




0.624 0.544




0.544 0.436




0.576 0.432




0.628 0.508




0.564 0.412




0.644 0.508




0.676 0.588




0.668 0.504




0.56 0.408




0.644 0.52




0.704 0.564




0.508 0.472




0.656 0.492




0.656 0.448




0.56 0.364




0.656 0.48




0.656 0.46




0.58 0.456




0.7 0.524




0.708 0.56




0.648 0.488




0.612 0.516




0.516 0.392




0.66 0.496




0.684 0.608




0.62 0.444




0.676 0.564




0.58 0.464




0.392 0.328




0.652 0.396




0.668 0.548




0.62 0.5




0.644 0.46




0.608 0.436




0.66 0.616




0.648 0.46




0.672 0.492




0.732 0.544




0.556 0.492




0.716 0.612




0.68 0.536




0.476 0.4




0.648 0.48




0.568 0.48




0.62 0.476




0.696 0.536




0.592 0.48




0.632 0.584




0.548 0.496




In [None]:
truncated.to_csv('./export/privacy/truncated-report-advantages-' + str(n_samples) + '.csv')