In [1]:
import sys 
sys.path.append('../../')

In [1]:
import Helpers.helpers as helpers

plain_df = helpers.load_dataset('../../data/seeds-dataset/rq1.csv')
plain_df.head()



Unnamed: 0,area,perimeter,class
0,15.26,14.84,0
1,14.88,14.57,0
2,14.29,14.09,0
3,13.84,13.94,0
4,16.14,14.99,0


In [2]:
from diffprivlib.mechanisms import laplace, gaussian

#lp = laplace.Laplace(epsilon=3, sensitivity=1)
#lp.randomise(2)

In [3]:
#gs = gaussian.GaussianAnalytic(epsilon=3, sensitivity=1, delta=0.1)
#gs.randomise(1)

In [4]:
from matplotlib import pyplot as plt
import numpy as np


def ls_at_distance(df, u, k):
    print(f'Computing local sensitivity for k = {k}')
    print(len(df) - k + 1)
    val = len(df) - k + 1
    return np.abs(u/val if val > 0 else 1)

#Smooth sensitivity and sampling in private data analysis.
def calc_smooth_sensitivity(X, u): 
    epsilon = 1           # set epsilon = 1
    delta = 1/len(X)**2  # set delta = 1/n^2

    # Step 1: set beta
    beta = epsilon / (2*np.log(2/delta))

    # Step 2: compute smoothed-out sensitivity for various values of k
    r = [np.exp(- beta * k) * ls_at_distance(X, u, k) for k in range(0,50)]
    plt.plot(r);
    plt.xlabel('Value of k')
    plt.ylabel('Smoothed-out Local Sensitivity');

    S = np.max(r)
    sensitivity = 2*S
    print(f'Final sensitivity: {sensitivity}')

In [6]:
#u = np.max(plain_df).max()
#calc_smooth_sensitivity(plain_df, u)
#sensitivity = 0.507

In [7]:
import numpy as np

# [https://github.com/forestneo/sunPytools/blob/master/dplib/ldp_mechanisms/piecewise_mechanism.py](https://github.com/forestneo/sunPytools/blob/master/dplib/ldp_mechanisms/piecewise_mechanism.py)
# Collecting and Analyzing Multidimensional Data with Local Differential Privacy
def randomize_piece_wise(epsilon, value):
    z = np.e ** (epsilon / 2)
    C = (z + 1) / (z - 1)
    P1 = (value + 1) / (2 + 2 * z)
    P2 = z / (z + 1)
    P3 = (1 - value) / (2 + 2 * z)

    g1 = (C + 1) * value / 2 - (C - 1) / 2
    g2 = (C + 1) * value / 2 + (C - 1) / 2

    rnd = np.random.random()
    if rnd < P1:
        result = -C + np.random.random() * (g1 - (-C))
    elif rnd < P1 + P2:
        result = (g2 - g1) * np.random.random() + g1
    else:
        result = (C - g2) * np.random.random() + g2
    return result


In [8]:
from Helpers.pairwise import PMBase, PiecewiseMechanism
pm_encoder = PMBase(epsilon=10)
pm_encoder.randomise(1)

1.0037665280313193

## Generate data

In [9]:
def generate_pairwise_perturbation(plain_df, epsilon):
    max = plain_df.max().max()
    min = plain_df.min().min()
    pm_encoder = PiecewiseMechanism(epsilon=epsilon,domain=(min, max))
    perturbed_df = plain_df.copy()
    for col in plain_df.columns:
        perturbed_df[col] = plain_df[col].apply(pm_encoder.randomise)
    return perturbed_df

def generate_laplace_perturbation(plain_df, epsilon):
    max = plain_df.max().max()
    min = plain_df.min().min()
    lp = laplace.Laplace(epsilon=epsilon, sensitivity=sensitivity)
    perturbed_df = plain_df.copy()
    for col in plain_df.columns:
        perturbed_df[col] = plain_df[col].apply(lambda x: lp.randomise(x))
    return perturbed_df

def generate_gaussian_perturbation(plain_df, epsilon):
    max = plain_df.max().max()
    min = plain_df.min().min()
    gs = gaussian.GaussianAnalytic(epsilon=epsilon, sensitivity=sensitivity, delta=0.1)
    perturbed_df = plain_df.copy()
    for col in plain_df.columns:
        perturbed_df[col] = plain_df[col].apply(lambda x: gs.randomise(x))
    return perturbed_df

In [10]:
plain_df.head()

Unnamed: 0,area,perimeter,class
0,15.26,14.84,0
1,14.88,14.57,0
2,14.29,14.09,0
3,13.84,13.94,0
4,16.14,14.99,0


In [11]:
generate_pairwise_perturbation(plain_df, epsilon=7).head()

Unnamed: 0,area,perimeter,class
0,15.557955,14.857397,-0.502997
1,14.94289,14.838237,-0.55495
2,14.273005,14.369473,-0.001828
3,13.869258,14.275747,18.747712
4,16.319046,15.292638,-0.488706


In [12]:
#generate_laplace_perturbation(plain_df=plain_df, epsilon=0.5).head()

In [13]:
#generate_gaussian_perturbation(plain_df=plain_df, epsilon=0.5).head()

In [14]:
import pandas as pd

epsilons = [0.05, 0.1 , 0.5 , 1, 2, 3, 5, 7, 9]
for epsilon in epsilons:
    Z = generate_pairwise_perturbation(plain_df, epsilon)
    #Z_gaussian = generate_gaussian_perturbation(plain_df, epsilon)
    #Z_laplace = generate_laplace_perturbation(plain_df, epsilon)
    Z_pd = pd.DataFrame(Z, columns=['area', 'perimeter'])
    #Z_gaussian_pd = pd.DataFrame(Z_gaussian, columns=['X', 'Y'])
    #Z_laplace_pd = pd.DataFrame(Z_laplace, columns=['X', 'Y'])
    Z_pd.to_csv('../export/seeds_dataset/pairwise/perturbed_'+str(epsilon)+'.csv')
   # Z_gaussian_pd.to_csv('../export/gaussian/perturbed_'+str(epsilon)+'.csv')
    #Z_laplace_pd.to_csv('../export/laplace/perturbed_'+str(epsilon)+'.csv')