In [424]:
import sys 
sys.path.append('../../')

In [425]:
import Helpers.helpers as helpers

plain_df = helpers.load_dataset('../export/plain.csv')
plain_df.head()

Unnamed: 0,X,Y
0,0.914339,4.550146
1,-1.93038,2.70215
2,1.872718,4.180692
3,-0.555524,4.695958
4,-1.034436,8.259404


In [426]:
from diffprivlib.mechanisms import laplace, gaussian

lp = laplace.Laplace(epsilon=3, sensitivity=1)
lp.randomise(2)

2.250195569313838

In [427]:
gs = gaussian.GaussianAnalytic(epsilon=3, sensitivity=1, delta=0.1)
gs.randomise(1)

0.7349670631889975

In [428]:
import numpy as np

# [https://github.com/forestneo/sunPytools/blob/master/dplib/ldp_mechanisms/piecewise_mechanism.py](https://github.com/forestneo/sunPytools/blob/master/dplib/ldp_mechanisms/piecewise_mechanism.py)
# Collecting and Analyzing Multidimensional Data with Local Differential Privacy
def randomize_piece_wise(epsilon, value):
    z = np.e ** (epsilon / 2)
    C = (z + 1) / (z - 1)
    P1 = (value + 1) / (2 + 2 * z)
    P2 = z / (z + 1)
    P3 = (1 - value) / (2 + 2 * z)

    g1 = (C + 1) * value / 2 - (C - 1) / 2
    g2 = (C + 1) * value / 2 + (C - 1) / 2

    rnd = np.random.random()
    if rnd < P1:
        result = -C + np.random.random() * (g1 - (-C))
    elif rnd < P1 + P2:
        result = (g2 - g1) * np.random.random() + g1
    else:
        result = (C - g2) * np.random.random() + g2
    return result


In [429]:
from Helpers.pairwise import PMBase, PiecewiseMechanism
pm_encoder = PMBase(epsilon=10)
pm_encoder.randomise(1)

1.0064503850002962

## Generate data

In [430]:
def generate_pairwise_perturbation(plain_df, epsilon):
    max = plain_df.max().max()
    min = plain_df.min().min()
    pm_encoder = PiecewiseMechanism(epsilon=epsilon,domain=(min, max))
    perturbed_df = plain_df.copy()
    for col in plain_df.columns:
        perturbed_df[col] = plain_df[col].apply(pm_encoder.randomize)
    return perturbed_df

def generate_laplace_perturbation(plain_df, epsilon):
    max = plain_df.max().max()
    min = plain_df.min().min()
    lp = laplace.Laplace(epsilon=epsilon, sensitivity=1)
    perturbed_df = plain_df.copy()
    for col in plain_df.columns:
        perturbed_df[col] = plain_df[col].apply(lambda x: lp.randomise(x))
    return perturbed_df

def generate_gaussian_perturbation(plain_df, epsilon):
    max = plain_df.max().max()
    min = plain_df.min().min()
    gs = gaussian.GaussianAnalytic(epsilon=epsilon, sensitivity=1, delta=0.1)
    perturbed_df = plain_df.copy()
    for col in plain_df.columns:
        perturbed_df[col] = plain_df[col].apply(lambda x: gs.randomise(x))
    return perturbed_df

In [431]:
plain_df.head()

Unnamed: 0,X,Y
0,0.914339,4.550146
1,-1.93038,2.70215
2,1.872718,4.180692
3,-0.555524,4.695958
4,-1.034436,8.259404


In [550]:
generate_pairwise_perturbation(plain_df, epsilon=7).head()

Unnamed: 0,X,Y
0,0.88751,4.664897
1,-2.190109,2.68818
2,1.782055,4.240117
3,-0.758676,4.788105
4,-1.075623,8.358021


In [544]:
generate_laplace_perturbation(plain_df=plain_df, epsilon=0.05).head()

Unnamed: 0,X,Y
0,9.267445,40.220538
1,-52.802611,-10.76366
2,17.748512,23.687368
3,9.341585,-16.025488
4,-14.701803,-32.088059


In [537]:
generate_gaussian_perturbation(plain_df=plain_df, epsilon=0.05).head()

Unnamed: 0,X,Y
0,1.723207,10.302628
1,-5.539759,1.297356
2,0.984854,12.259063
3,0.524355,7.294683
4,-4.777323,11.025839


In [556]:
import pandas as pd

epsilons = [0.05, 0.1 , 0.5 , 1, 2, 3, 5, 7, 9]
for epsilon in epsilons:
    Z = generate_pairwise_perturbation(plain_df, epsilon)
    Z_gaussian = generate_gaussian_perturbation(plain_df, epsilon)
    Z_laplace = generate_laplace_perturbation(plain_df, epsilon)
    Z_pd = pd.DataFrame(Z, columns=['X', 'Y'])
    Z_gaussian_pd = pd.DataFrame(Z_gaussian, columns=['X', 'Y'])
    Z_laplace_pd = pd.DataFrame(Z_laplace, columns=['X', 'Y'])
    Z_pd.to_csv('../export/pairwise/perturbed_'+str(epsilon)+'.csv')
    Z_gaussian_pd.to_csv('../export/gaussian/perturbed_'+str(epsilon)+'.csv')
    Z_laplace_pd.to_csv('../export/laplace/perturbed_'+str(epsilon)+'.csv')