# Computing distances

Preprocessing and computing distances for pairwise attacks. Distances are calculated both in pixel and feature space

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import umap
import h5py
from glob import glob
from scipy.spatial.distance import cdist
from tqdm.notebook import tqdm
import pickle

from scipy.spatial.distance import cdist
from scipy.stats import gaussian_kde

In [4]:
#Loading synthetic, candidate and test set
candidate = np.load('./privacy_saves/candidate.npy')
candidate_flat = candidate.reshape([len(candidate), 9*64*64])

h5f = h5py.File('./privacy_saves/synth_set.h5', 'r')
synth_set = h5f['images'][:]
synth_set = np.array(synth_set.reshape([len(synth_set), 9*64*64]))
h5f.close()

# Only used in supplemental as a reference for distances
h5f = h5py.File('./privacy_saves/test_set.h5', 'r')
test_set = h5f['images'][:]
test_set = np.array(test_set.reshape([len(test_set), 9*64*64]))
h5f.close()

#### Pixel space L2 distance between candidate and synthetic

In [5]:
L2_dist = np.zeros([len(candidate_flat), len(synth_set)])

# We split the data into 20 chunks and compute distances per chunk to track progress with tqdm 
# with synth_set of len 10'000
for split in tqdm(range(20)):
    curr_dist = cdist(candidate_flat,
                      synth_set[split*500:(split+1)*500])
    L2_dist[:, split*500:(split+1)*500] = curr_dist
    
dist_candidate = (L2_dist.min(axis=1))/np.median(L2_dist, axis=1)

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




#### Pixel space L2 distance between test (1000 samples) and synthetic, only used in supplemental as reference distance

In [6]:
L2_test_dist = np.zeros([1000, 10000])

# We split the data into 20 chunks and compute distances per chunk to track progress with tqdm 
# with synth_set of len 10'000
for split in tqdm(range(10)):
    curr_dist = cdist(test_set[:1000].reshape([1000, 36864]),
                      synth_set[split*1000:(split+1)*1000])
    L2_test_dist[:, split*1000:(split+1)*1000] = curr_dist

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




In [7]:
np.save('./privacy_saves/L2_candidate_syth.npy', L2_dist)
np.save('./privacy_saves/L2_test_synth.npy', L2_test_dist)

#### Feature distance

In [12]:
# Load feature space data
u_candidate = np.load('./privacy_saves/u_candidate.npy', allow_pickle=True)
u_synth = np.load('./privacy_saves/u_synth.npy', allow_pickle=True)
u_test = np.load('./privacy_saves/u_test.npy', allow_pickle=True)

In [13]:
# Compute distances
# Candidate-synthetic
umap_dist_candidate_synth = cdist(u_candidate, u_synth[:])
# Test-synthetic
umap_dist_test_synth = cdist(u_test[:1000], u_synth[:])

In [14]:
np.save('./privacy_saves/umap_dist_candidate_synth.npy', umap_dist_candidate_synth)
np.save('./privacy_saves/umap_dist_test_synth.npy', umap_dist_test_synth)