In [28]:
import sys 
sys.path.append('../../')

In [29]:
import Helpers.helpers as helpers

plain_df = helpers.load_dataset('../export/plain.csv')
plain_df.head()

Unnamed: 0,X,Y
0,0.914339,4.550146
1,-1.93038,2.70215
2,1.872718,4.180692
3,-0.555524,4.695958
4,-1.034436,8.259404


In [30]:
from diffprivlib.mechanisms import laplace, gaussian

lp = laplace.Laplace(epsilon=3, sensitivity=1)
lp.randomise(2)

2.0770944922123693

In [31]:
gs = gaussian.GaussianAnalytic(epsilon=3, sensitivity=1, delta=0.1)
gs.randomise(1)

0.8504247475497433

In [32]:
import numpy as np

# [https://github.com/forestneo/sunPytools/blob/master/dplib/ldp_mechanisms/piecewise_mechanism.py](https://github.com/forestneo/sunPytools/blob/master/dplib/ldp_mechanisms/piecewise_mechanism.py)
# Collecting and Analyzing Multidimensional Data with Local Differential Privacy
def randomize_piece_wise(epsilon, value):
    z = np.e ** (epsilon / 2)
    C = (z + 1) / (z - 1)
    P1 = (value + 1) / (2 + 2 * z)
    P2 = z / (z + 1)
    P3 = (1 - value) / (2 + 2 * z)

    g1 = (C + 1) * value / 2 - (C - 1) / 2
    g2 = (C + 1) * value / 2 + (C - 1) / 2

    rnd = np.random.random()
    if rnd < P1:
        result = -C + np.random.random() * (g1 - (-C))
    elif rnd < P1 + P2:
        result = (g2 - g1) * np.random.random() + g1
    else:
        result = (C - g2) * np.random.random() + g2
    return result


In [33]:
from Helpers.pairwise import PMBase, PiecewiseMechanism
pm_encoder = PMBase(epsilon=10)
pm_encoder.randomise(1)

1.0102101097850413

## Generate data

In [34]:
def generate_pairwise_perturbation(plain_df, epsilon):
    max = plain_df.max().max()
    min = plain_df.min().min()
    pm_encoder = PiecewiseMechanism(epsilon=epsilon,domain=(min, max))
    perturbed_df = plain_df.copy()
    for col in plain_df.columns:
        perturbed_df[col] = plain_df[col].apply(pm_encoder.randomise)
    return perturbed_df

def generate_laplace_perturbation(plain_df, epsilon):
    max = plain_df.max().max()
    min = plain_df.min().min()
    lp = laplace.Laplace(epsilon=epsilon, sensitivity=1/plain_df.size)
    perturbed_df = plain_df.copy()
    for col in plain_df.columns:
        perturbed_df[col] = plain_df[col].apply(lambda x: lp.randomise(x))
    return perturbed_df

def generate_gaussian_perturbation(plain_df, epsilon):
    max = plain_df.max().max()
    min = plain_df.min().min()
    gs = gaussian.GaussianAnalytic(epsilon=epsilon, sensitivity=1/plain_df.size, delta=0.1)
    perturbed_df = plain_df.copy()
    for col in plain_df.columns:
        perturbed_df[col] = plain_df[col].apply(lambda x: gs.randomise(x))
    return perturbed_df

In [35]:
plain_df.head()

Unnamed: 0,X,Y
0,0.914339,4.550146
1,-1.93038,2.70215
2,1.872718,4.180692
3,-0.555524,4.695958
4,-1.034436,8.259404


In [36]:
generate_pairwise_perturbation(plain_df, epsilon=7).head()

Unnamed: 0,X,Y
0,0.85022,4.50868
1,-2.025055,2.736012
2,2.006749,4.17411
3,-0.692603,4.736974
4,-1.072991,8.426809


In [37]:
generate_laplace_perturbation(plain_df=plain_df, epsilon=0.5).head()

Unnamed: 0,X,Y
0,0.904561,4.550796
1,-1.889332,2.702357
2,1.909471,4.194915
3,-0.539499,4.694549
4,-1.046246,8.219261


In [38]:
generate_gaussian_perturbation(plain_df=plain_df, epsilon=0.5).head()

Unnamed: 0,X,Y
0,0.92647,4.542446
1,-1.912501,2.699781
2,1.896978,4.188605
3,-0.585508,4.68462
4,-1.017901,8.247705


In [39]:
import pandas as pd

epsilons = [0.05, 0.1 , 0.5 , 1, 2, 3, 5, 7, 9]
for epsilon in epsilons:
    Z = generate_pairwise_perturbation(plain_df, epsilon)
    Z_gaussian = generate_gaussian_perturbation(plain_df, epsilon)
    Z_laplace = generate_laplace_perturbation(plain_df, epsilon)
    Z_pd = pd.DataFrame(Z, columns=['X', 'Y'])
    Z_gaussian_pd = pd.DataFrame(Z_gaussian, columns=['X', 'Y'])
    Z_laplace_pd = pd.DataFrame(Z_laplace, columns=['X', 'Y'])
    Z_pd.to_csv('../export/pairwise/perturbed_'+str(epsilon)+'.csv')
    Z_gaussian_pd.to_csv('../export/gaussian/perturbed_'+str(epsilon)+'.csv')
    Z_laplace_pd.to_csv('../export/laplace/perturbed_'+str(epsilon)+'.csv')