In [1]:
import math
import torch
import seaborn as sns
from time import time
from data import Dataset
from privacy import LaplaceMechanism
from rich.progress import track
from torch_sparse import SparseTensor

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-lwvmg3c4 because the default path (/idiap/home/sajadmanesh/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [11]:
import numpy as np
class AsymmetricRandResponse:
    def __init__(self, eps):
        self.eps_link = eps * 0.9
        self.eps_density = eps * 0.1
        
    def arr(self, data: SparseTensor):
        n = data.size(1)
        sensitivity = 1 / (n*n)
        p = 1 / (1 + math.exp(-self.eps_link))
        d = np.random.laplace(loc=data.density(), scale=sensitivity/self.eps_density)
        q = d / (2*p*d - p - d + 1)
        q = min(1, q)
        pr_1to1 = p * q
        pr_0to1 = (1 - p) * q
        mask = data.to_dense(dtype=bool)
        out = mask * pr_1to1 + (~mask) * pr_0to1
        torch.bernoulli(out, out=out)
        out = SparseTensor.from_dense(out, has_value=False)
        return out

    def __call__(self, data, chunk_size=1000):
        chunks = self.split_sparse(data, chunk_size=chunk_size)
        pert_chunks = []

        for chunk in chunks:    
            chunk_pert = self.arr(chunk)
            pert_chunks.append(chunk_pert)

        data_pert = self.merge_sparse(pert_chunks, chunk_size=chunk_size)
        return data_pert
    
    @staticmethod
    def split_sparse(mat, chunk_size):
        chunks = []
        for i in range(0, mat.size(0), chunk_size):
            if (i + chunk_size) <= mat.size(0):
                chunks.append(mat[i:i+chunk_size])
            else:
                chunks.append(mat[i:])
        return chunks
    
    @staticmethod
    def merge_sparse(chunks, chunk_size):
        n = (len(chunks) - 1) * chunk_size + chunks[-1].size(0)
        m = chunks[0].size(1)
        row = torch.cat([chunk.coo()[0] + i * chunk_size for i, chunk in enumerate(chunks)])
        col = torch.cat([chunk.coo()[1] for chunk in chunks])
        out = SparseTensor(row=row, col=col, sparse_sizes=(n, m))#.coalesce()
        return out
        

In [3]:
dataset = 'facebook'
device = 'cuda'
chunk_size = 1000

In [4]:
print('loading dataset...', end='')
time_start = time()    
data = Dataset(dataset, data_dir='../datasets').load()
adj_t = data.adj_t
time_end = time()
print(f'done in {time_end - time_start:.2f}s')

loading dataset...done in 0.43s


In [5]:
print(f'moving data to {device}...', end='')
time_start = time()
data = data.to(device)
adj_t = data.adj_t
time_end = time()
print(f'done in {time_end - time_start:.2f}s')
gpu_mem = torch.cuda.max_memory_allocated() / 1024 ** 3
print(f'max GPU memory used = {gpu_mem:.2f} GB\n')

moving data to cuda...done in 2.07s
max GPU memory used = 0.10 GB



In [12]:
print(f'perturbing data...', end='')
time_start = time()

mech = AsymmetricRandResponse(eps=1)
adj_t_pert = mech(adj_t, chunk_size=chunk_size)

time_end = time()
print(f'done in {time_end - time_start:.2f}s')
gpu_mem = torch.cuda.max_memory_allocated() / 1024 ** 3
print(f'max GPU memory used = {gpu_mem:.2f} GB\n')

perturbing data...done in 0.06s
max GPU memory used = 0.50 GB

