# Effect of $\epsilon$ on Average Treatment Effect on IHDP

In [1]:
import copy

import numpy as np
import matplotlib.pyplot as plt
import torch

from misc.agm import calibrateAnalyticGaussianMechanism

%matplotlib inline

# set random seed
np.random.seed(1)
torch.manual_seed(1)

<torch._C.Generator at 0x10f7e0c30>

In [2]:
# no. samples for fitting, no. samples for estimating, no. of draws of z
nf = 500
nt = 500
nd = 1

# privacy parameters
epses = [0.2, 0.4, 0.6, 0.8, 0.99]
delta = 1e-6

# regularisation coefficient
reg_co = 0.1

## Preprocess IHDP data

In [3]:
# load training data
train = {}

with np.load('data/IHDP-1000/ihdp_npci_1-1000.train.npz') as data:
    for i in data.files:
        # convert all arrays to torch tensors
        train[i] = torch.tensor(data[i], dtype=torch.float64)
        
# ate is the true average treatment effect
# yf is the noisy factual outcome
# need only, ate, yf, t and x

# swap axis and preprocess x
for i in  ['yf', 't', 'x']:
    if i != 'x':
        train[i] = train[i].transpose(0, 1)
    else:
        train[i] = train[i].permute(2, 0, 1)
        
# get no. experiments and dim
ne, _, d = train['x'].shape

# change ne if required
# ne = 250

# generate X, T, Y through subsampling
Y_train, X_train = [], []
T_train = torch.stack(
    [torch.cat([torch.ones(int(nf/2), dtype=torch.float64), torch.zeros(int(nf/2), dtype=torch.float64)])] * ne
)

for i in range(ne):
    # get indices for t=1 and t=0
    t1_idx = train['t'][i, :].nonzero().squeeze()
    t0_idx = (1 - train['t'][i, :]).nonzero().squeeze()
                 
    # subsample n indices, n/2 for t=1 and n/2 for t=0
    sam_idx = np.hstack([np.random.choice(t1_idx, int(nf/2)), np.random.choice(t0_idx, int(nf/2))])
    
    Y_train.append(train['yf'][i, sam_idx])
    X_train.append(train['x'][i, sam_idx, :])
    
# convert to torch tensors 
Y_train, X_train = torch.stack(Y_train), torch.stack(X_train)

# permute data
# permute indices
perm = torch.stack([torch.randperm(nf) for i in range(ne)])

# create auxiliary indices
idx = torch.arange(ne)[:, None]

# permute X_train, T_train, Y_train
X_train = X_train[idx, perm]
T_train = T_train[idx, perm]
Y_train = Y_train[idx, perm]

In [4]:
# load test data
test = {}

with np.load('data/IHDP-1000/ihdp_npci_1-1000.test.npz') as data:
    for i in data.files:
        # convert all arrays to torch tensors
        test[i] = torch.tensor(data[i], dtype=torch.float64)
        
# ate is the true average treatment effect
# yf is the noisy factual outcome
# need only, ate, yf, t and x

# swap axis and preprocess x
for i in  ['yf', 't', 'x']:
    if i != 'x':
        test[i] = test[i].transpose(0, 1)
    else:
        test[i] = test[i].permute(2, 0, 1)
        
# get no. experiments and dim
ne, _, d = test['x'].shape

# change ne if required
# ne = 250

# generate X_test, T_test, Y_test through subsampling
Y_test, X_test = [], []
T_test = torch.stack([torch.cat(
    [torch.ones(int(nt/2), dtype=torch.float64), torch.zeros(int(nt/2), dtype=torch.float64)])] * ne
)

for i in range(ne):
    # get indices for t=1 and t=0
    t1_idx = test['t'][i, :].nonzero().squeeze()
    t0_idx = (1 - test['t'][i, :]).nonzero().squeeze()
                 
    # subsample n indices, n/2 for t=1 and n/2 for t=0
    sam_idx = np.hstack([np.random.choice(t1_idx, int(nt/2)), np.random.choice(t0_idx, int(nt/2))])
    
    Y_test.append(test['yf'][i, sam_idx])
    X_test.append(test['x'][i, sam_idx, :])
    
# convert to torch tensors 
Y_test, X_test = torch.stack(Y_test), torch.stack(X_test)

# permute data
# permute indices
perm = torch.stack([torch.randperm(nt) for i in range(ne)])

# create auxiliary indices
idx = torch.arange(ne)[:, None]

# permute X_train, T_train, Y_train
X_test = X_test[idx, perm]
T_test = T_test[idx, perm]
Y_test = Y_test[idx, perm]

In [5]:
# concatenate train and test
X_all = torch.cat([X_train, X_test], 1)
T_all = torch.cat([T_train, T_test], 1)
Y_all = torch.cat([Y_train, Y_test], 1)

## Define model and method

In [6]:
class Log_Reg(torch.nn.Module):
    '''
    Logistic Regression
    '''
    def __init__(self, D_in, D_out):
        super(Log_Reg, self).__init__()
        self.linear = torch.nn.Linear(D_in, D_out, bias=False)
        
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

In [7]:
def IPW_PPS(X, T, Y, epses, delta, reg_co, nd, nf):
    '''
    privatised average treatment effect with inverse propensity weighting using private propensity scores
    '''
    # get # experiments, # samples, # dimensions
    ne, ns, dim = X.shape

    # sgd step size
    step_size = 0.01

    ################
    # process data #
    ################

    # get Y0 and Y1
    Y0 = Y * (1 - T)
    Y1 = Y * T
    
    # split data
    # get splits
    fit_split = nf
    est_split = nt

    # permute indices
    perm = torch.stack(
        [torch.randperm(ns) for i in range(ne)]
    )

    # create auxiliary indices
    idx = torch.arange(ne)[:, None]

    # split X into fit, estimate splits
    X_s0 = X[:, :fit_split]
    X_s1 = X[:, fit_split:]

    # expand dim of T to allow multiplication with X
    T_ex_dim = T.reshape(ne, ns, 1)

    # split X0 and X1 into fit, estimate splits
    X0_s1 = (X * (1 - T_ex_dim))[:, fit_split:]
    X1_s1 = (X * T_ex_dim)[:, fit_split:]

    # split T into fit, estimate splits
    T_s0 = T[:, :fit_split]
    T_s1 = T[:, fit_split:]
        
    # split Y0 and Y1 into fit, estimate splits
    Y0_s0 = Y0[:, :fit_split]
    Y1_s0 = Y1[:, :fit_split]

    Y0_s1 = Y0[:, fit_split:]
    Y1_s1 = Y1[:, fit_split:]

    # reshape estimate splits for later
    Y0_s1 = Y0_s1.reshape(ne, 1, est_split)
    Y1_s1 = Y1_s1.reshape(ne, 1, est_split)
    
    ##############
    # fit models #
    ##############

    # instantiate ne different models
    models = [Log_Reg(dim, 1) for i in range(ne)]
    # set model parameters to float64
    [model.double() for model in models]

    # define loss (binary cross entropy)
    loss = torch.nn.BCELoss()

    # define optimisers
    optimisers = [
        torch.optim.SGD(
            models[i].parameters(),
            lr=step_size,
            weight_decay=reg_co,
        )
        for i in range(ne)
    ]

    # train models
    for t in range(1000):
        preds = [
            models[i](X_s0[i]).squeeze() for i in range(ne)
        ]
        losses = [
            loss(preds[i], T_s0[i]) for i in range(ne)
        ]
        [opt.zero_grad for opt in optimisers]
        [loss.backward() for loss in losses]
        [opt.step() for opt in optimisers]

    #############################
    # estimate treatment effect #
    #############################

    # initialise pi_hat dictionaries
    pi_hats = {}
    
    # initialise e dictionary
    e = {}
    
    # intialise sigma dictionary
    sig_d = {}

    # get estimated propensity scores
    pi_hats[0] = torch.stack(
        [models[i](X_s1[i]).squeeze() for i in range(ne)]
    )

    # perturb model and get relevant quantities
    for eps in epses:
        # define sensitivity for log reg
        s_w = 2.0 / (fit_split * reg_co)

        # define sigma for log reg
        sigma = np.sqrt(
            2 * np.log(1.25 / delta) + 1e-10
        ) * (s_w / eps)
        sigma_2 = sigma ** 2

#         # analytic gaussian mechanism
#         sigma = calibrateAnalyticGaussianMechanism(eps, delta, s_w)
#         sigma_2 = sigma ** 2

        # define z distribution for log reg
        z_dist = torch.distributions.normal.Normal(
            torch.tensor([0.0], dtype=torch.float64),
            torch.tensor([sigma], dtype=torch.float64),
        )

        # draw z for log reg
        z_vecs = z_dist.sample(
            (ne, nd, dim)
        ).reshape(ne, nd, dim)

        # create temp models
        models_ = [copy.deepcopy(models) for i in range(nd)]

        # initialise list for privatised estimated propensity scores
        pi_hats[eps] = []

        # perturb weights with z_vecs
        for i in range(ne):
            for j in range(nd):
                model_temp = models_[j][i]
                model_temp.linear.weight.data.add_(
                    z_vecs[i, j, :]
                )
                pi_hats[eps].append(
                    model_temp(X_s1[i]).squeeze()
                )

        # reshape stacked privatised estimated propensity scores as ne * nd
        pi_hats[eps] = torch.stack(pi_hats[eps]).reshape(ne, nd, est_split)
                        
        # max of abs of Y1_s1 / propensity score for each experiment
        max_abs_Y1_s1_div_ps = torch.max(
            torch.abs(Y1_s1) / ((ns - nf) * pi_hats[eps]), 2
        )[0]
        
        # max of abs of Y0_s1 / (1 - propensity score) for each experiment
        max_abs_Y0_s1_div_1_m_ps = torch.max(
            torch.abs(Y1_s1) / ((ns - nf) * (1 - pi_hats[eps])), 2
        )[0]
        # hstack max_abs_Y_s1_div_ps and max_abs_Y_s1_div_1_m_ps
        max_abs_all = torch.cat(
            (max_abs_Y1_s1_div_ps, max_abs_Y0_s1_div_1_m_ps), 
            1,
        )
        
        # replace inf/nan with 1e20 for stability
        max_abs_all[torch.isfinite(max_abs_all) == 0] = 1e20
            
        # define sensitivity for estimation
        s_e = 2 * torch.max(max_abs_all, 1)[0]
        
        # define sigma for estimation
        sigma_e = np.sqrt(
            2 * np.log(1.25 / delta) + 1e-10
        ) * (s_e / eps)
        sig_d[eps] = sigma_e.detach().numpy()
        sigma_e_2 = sigma_e ** 2
        
#         # analytic gaussian mechanism
#         sigma_e = calibrateAnalyticGaussianMechanism(eps, delta, s_e)
#         sigma_e_2 = sigma_e ** 2

        # define e distribution for estimation
        e_dist = torch.distributions.multivariate_normal.MultivariateNormal(
            torch.tensor([0.0], dtype=torch.float64),
            torch.diag(sigma_e)
        )

        # draw e for estimation
        e[eps] = e_dist.sample().reshape(ne)
    
    # get treatment effects
    # true
    te = {}
    # empirical means and std of means of ERM + private ERM
    te_hats = {'means': [], 'stds': []}
    # means and std of means of privatised te_hats
    te_hats_p = {'means': [], 'stds': []}
                
    for key in pi_hats.keys():
        if key != 0:
            # empirical estimates
            # reduce_mean from (ne, nd, est_split) tensor to (ne * nd, 1) matrix
            te_hats_ = torch.mean(
                Y1_s1 / pi_hats[key] - Y0_s1 / (1 - pi_hats[key]), 
                [1, 2],
            )
        else:
            # empirical estimate for noiseless case
            # reduce_mean from (ne, est_split) tensor to (ne , 1) matrix
            te_hats_ = torch.mean(
                Y1_s1.squeeze() / pi_hats[key] - Y0_s1.squeeze() / (1 - pi_hats[key]),
                1,
            )
        te_hats['means'].append(
            te_hats_.detach().numpy()
        )
        te_hats['stds'].append(
            te_hats_.std().detach().numpy()
        )
        try:
            te_hats_p_ = te_hats_ + e[key]
            te_hats_p['means'].append(
                te_hats_p_.detach().numpy()
            )
            te_hats_p['stds'].append(
                te_hats_p_.std().detach().numpy()
            )
        except KeyError:
            # fill first row for later
            te_hats_p['means'].append(
                te_hats_.detach().numpy()
            )
            te_hats_p['stds'].append(
                te_hats_.std().detach().numpy()
            )
        
    te_hats['means'] = np.array(te_hats['means'])
    te_hats['stds'] = np.array(te_hats['stds'])
    te_hats_p['means'] = np.array(te_hats_p['means'])
    te_hats_p['stds'] = np.array(te_hats_p['stds'])

    return te, te_hats, te_hats_p, sig_d

## Run method and print results

In [8]:
te, te_hats, te_hats_p, sig_d = IPW_PPS(X_all, T_all, Y_all, epses, delta, reg_co, nd, nf)

In [9]:
means = [np.mean(i) for i in te_hats['means']]
for i in range(len(means)):
    if i == 0:
        print('The mean ATE for no epsilon is {}'.format(means[i]))
    else:
        print('The mean ATE for epsilon = {} is {}'.format(epses[i-1], means[i]))                        

The mean ATE for no epsilon is 4.795313038886711
The mean ATE for epsilon = 0.2 is -237487.30458324638
The mean ATE for epsilon = 0.4 is 32.33690793468985
The mean ATE for epsilon = 0.6 is 8.365970893360508
The mean ATE for epsilon = 0.8 is 5.222382916462129
The mean ATE for epsilon = 0.99 is 5.3131871962655985


In [10]:
means = [np.mean(i) for i in te_hats_p['means']]
for i in range(len(means)):
    if i == 0:
        print('The mean ATE for no epsilon is {}'.format(means[i]))
    else:
        print('The privatised mean ATE for epsilon = {} is {}'.format(epses[i-1], means[i]))                        

The mean ATE for no epsilon is 4.795313038886711
The privatised mean ATE for epsilon = 0.2 is -237477.12717456
The privatised mean ATE for epsilon = 0.4 is 32.1964618835124
The privatised mean ATE for epsilon = 0.6 is 8.49751873741456
The privatised mean ATE for epsilon = 0.8 is 5.087463207202751
The privatised mean ATE for epsilon = 0.99 is 5.40215889638596


In [11]:
sgn_tau_hat = np.sign(te_hats['means'][0])
    
# compute probabilities
probs = [sum(np.sign(i) != sgn_tau_hat) / ne for i in te_hats['means'][1:]]

for i in range(len(epses)):
    print('The probability of signs being flipped for non-privatised ATE for epsilon = {} is {}'.format(epses[i], probs[i]))     

The probability of signs being flipped for non-privatised ATE for epsilon = 0.2 is 0.515
The probability of signs being flipped for non-privatised ATE for epsilon = 0.4 is 0.425
The probability of signs being flipped for non-privatised ATE for epsilon = 0.6 is 0.344
The probability of signs being flipped for non-privatised ATE for epsilon = 0.8 is 0.287
The probability of signs being flipped for non-privatised ATE for epsilon = 0.99 is 0.229


In [12]:
sgn_tau_hat = np.sign(te_hats_p['means'][0])
    
# compute probabilities
probs = [sum(np.sign(i) != sgn_tau_hat) / ne for i in te_hats_p['means'][1:]]

for i in range(len(epses)):
    print('The probability of signs being flipped for privatised ATE for epsilon = {} is {}'.format(epses[i], probs[i]))     

The probability of signs being flipped for privatised ATE for epsilon = 0.2 is 0.517
The probability of signs being flipped for privatised ATE for epsilon = 0.4 is 0.425
The probability of signs being flipped for privatised ATE for epsilon = 0.6 is 0.347
The probability of signs being flipped for privatised ATE for epsilon = 0.8 is 0.301
The probability of signs being flipped for privatised ATE for epsilon = 0.99 is 0.228
