In [1]:
import os
os.chdir('C:/Users/liuch/Dropbox (Personal)/MIT Pre-Doc Personal/Rahul Singh/Adversarial Riesz/Empirical Application/adversarial_riesz')
# os.chdir('C:/Users/liuw/Dropbox (Personal)/MIT Pre-Doc Personal/Rahul Singh/Adversarial Riesz/Empirical Application/adversarial_riesz')

In [25]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Definition of Diff-in-Diff Moment

In [26]:
import os
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import scipy
import scipy.special
from sklearn.linear_model import LassoCV, LogisticRegressionCV, LinearRegression, Lasso, LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.base import clone
import torch
import torch.nn as nn



# E[E[Y|D=1, A=1, X] – E[Y|D=0, A=1, X] – (E[Y|D=1, A=0, X] – E[Y|D=0, A=0, X])]
# D is the first column, A is the second, and X is the remaining columns.
def moment_fn(x, test_fn):
    n_obs = x.shape[0]
    if torch.is_tensor(x):
        with torch.no_grad():
            t11 = torch.cat([torch.ones((n_obs, 2)).to(device), x[:, 2:]], dim=1)
            t01 = torch.cat([torch.zeros((n_obs, 1)).to(device), torch.ones((n_obs, 1)).to(device), x[:, 2:]], dim=1)
            t10 = torch.cat([torch.ones((n_obs, 1)).to(device), torch.zeros((n_obs, 1)).to(device), x[:, 2:]], dim=1)
            t00 = torch.cat([torch.zeros((n_obs, 2)).to(device), x[:, 2:]], dim=1)
    else:
        t11 = np.hstack([np.ones((n_obs, 2)), x[:, 2:]])
        t01 = np.hstack([np.zeros((n_obs, 1)), np.ones((n_obs, 1)), x[:, 2:]])
        t10 = np.hstack([np.ones((n_obs, 1)), np.zeros((n_obs, 1)), x[:, 2:]])
        t00 = np.hstack([np.zeros((n_obs, 2)), x[:, 2:]])
    return test_fn(t11) - test_fn(t01) - test_fn(t10) + test_fn(t00)

# Functions for Different Adversarial Riesz Estimators

In [27]:
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import Pipeline



from sklearn.linear_model import LassoCV

def get_reg_fn(X, y):
    est = LassoCV(max_iter=10000, random_state=123).fit(X, y)
    return lambda: Lasso(alpha=est.alpha_, max_iter=10000, random_state=123)



from debiased import DebiasedMoment
from advreisz.linear import SparseLinearAdvRiesz

def get_splin_fn(X):
    return lambda: SparseLinearAdvRiesz(moment_fn,
                                        featurizer=Pipeline([('p', PolynomialFeatures(degree=2, include_bias=False)),
                                                             ('s', StandardScaler()),
                                                             ('cnt', PolynomialFeatures(degree=1, include_bias=True))]),
                                        n_iter=50000, lambda_theta=0.01, B=10,
                                        tol=0.00001)



from advreisz.kernel import AdvNystromKernelReisz
from utilities import AutoKernel, prod_kernel

# advkernel_fn = lambda: AdvKernelReisz(kernel=AutoKernel(type='var'), regm=6e-4, regl=1e-4)

def get_advnyskernel_fn(X):
    est = AdvNystromKernelReisz(kernel=lambda X, Y=None: prod_kernel(X, Y=Y, gamma=1.0/X.shape[1]),
                                regm='auto', regl='auto', n_components=100, random_state=123)
    reg = est.opt_reg(X)
    print(est.scores_)
    print(reg)
    return lambda: AdvNystromKernelReisz(kernel=lambda X, Y=None: prod_kernel(X, Y=Y, gamma=1.0/X.shape[1]),
                                         regm=6*reg, regl=reg, n_components=100, random_state=123)



from advreisz.kernel import AdvKernelReisz
from utilities import AutoKernel, prod_kernel

# advkernel_fn = lambda: AdvKernelReisz(kernel=AutoKernel(type='var'), regm=6e-4, regl=1e-4)

def get_advkernel_fn(X):
    est = AdvKernelReisz(kernel=lambda X, Y=None: prod_kernel(X, Y=Y, gamma=1.0/X.shape[1]), regm='auto', regl='auto')
    reg = est.opt_reg(X)
    print(est.scores_)
    print(reg)
    return lambda: AdvKernelReisz(kernel=lambda X, Y=None: prod_kernel(X, Y=Y, gamma=1.0/X.shape[1]), regm=6*reg, regl=reg)



from advreisz.kernel import NystromKernelReisz
from utilities import AutoKernel, prod_kernel

# kernel_fn = lambda: KernelReisz(kernel=AutoKernel(type='var'), regl=6e-2)

def get_nyskernel_fn(X):
    est = NystromKernelReisz(kernel=lambda X, Y=None: prod_kernel(X, Y=Y, gamma=1.0/X.shape[1]),
                             regl='auto', n_components=100, random_state=123)
    reg = est.opt_reg(X)
    print(est.scores_)
    print(reg)
    return lambda: NystromKernelReisz(kernel=lambda X, Y=None: prod_kernel(X, Y=Y, gamma=1.0/X.shape[1]),
                                      regl=reg, n_components=100, random_state=123)



from advreisz.kernel import KernelReisz
from utilities import AutoKernel, prod_kernel

# kernel_fn = lambda: KernelReisz(kernel=AutoKernel(type='var'), regl=6e-2)

def get_kernel_fn(X):
    est = KernelReisz(kernel=lambda X, Y=None: prod_kernel(X, Y=Y, gamma=1.0/X.shape[1]), regl='auto')
    reg = est.opt_reg(X)
    print(est.scores_)
    print(reg)
    return lambda: KernelReisz(kernel=lambda X, Y=None: prod_kernel(X, Y=Y, gamma=1.0/X.shape[1]), regl=reg)



from utilities import PluginRR2

def get_lg_plugin_fn(X):
    clf = LogisticRegressionCV(cv=3, max_iter=10000, random_state=123)
    C_ = clf.fit(X[:, 1:], X[:, 0]).C_[0]
    return lambda: PluginRR2(model_t=LogisticRegression(C=C_, max_iter=10000, random_state=123),
                            min_propensity=0)


def get_rf_plugin_fn(X):
    gcv = GridSearchCV(RandomForestClassifier(bootstrap=True, random_state=123),
                       param_grid={'max_depth': [3, None],
                                   'min_samples_leaf': [10, 50]},
                       scoring='r2',
                       cv=5)
    best_model = clone(gcv.fit(X[:, 1:], X[:, 0]).best_estimator_)
    return lambda: PluginRR2(model_t=best_model, min_propensity=0)



from utilities import FitParamsWrapper
from advreisz.deepreisz import AdvReisz


device = None #torch.cuda.current_device() if torch.cuda.is_available() else None

# Returns a deep model for the reisz representer
def get_learner(n_t, n_hidden, p):
    return nn.Sequential(nn.Dropout(p=p), nn.Linear(n_t, n_hidden), nn.LeakyReLU(),
                         nn.Dropout(p=p), nn.Linear(n_hidden, n_hidden), nn.LeakyReLU(),
                         nn.Dropout(p=p), nn.Linear(n_hidden, 1))

# Returns a deep model for the test functions
def get_adversary(n_z, n_hidden, p):
    return nn.Sequential(nn.Dropout(p=p), nn.Linear(n_z, n_hidden), nn.ReLU(),
                         nn.Dropout(p=p), nn.Linear(n_hidden, n_hidden), nn.ReLU(),
                         nn.Dropout(p=p), nn.Linear(n_hidden, 1))

print("GPU:", torch.cuda.is_available())

def get_agmm_fn(X):
    n_hidden = 150
    dropout = 0.5
    return lambda: FitParamsWrapper(AdvReisz(get_learner(X.shape[1], n_hidden, dropout),  # Edited so that #features in is #cols of X
                                             get_adversary(X.shape[1], n_hidden, dropout),
                                             moment_fn),
                                   val_fr=.2,
                                   preprocess_epochs=200,
                                   earlystop_rounds=100,
                                   store_test_every=20,
                                   learner_lr=1e-4, adversary_lr=1e-4,
                                   learner_l2=6e-4, adversary_l2=1e-4,
                                   n_epochs=1000, bs=100,
                                   logger=None, model_dir=str(Path.home()), device=device, verbose=1)

GPU: False


# Hyperparameters

In [28]:
import pandas as pd
from sklearn.utils.multiclass import type_of_target

n_splits = 1
res = {}
q=0  # Data not split up into quantiles
res[f'q={q}'] = {}
print(f'Quintile={q}')

Quintile=0


# Load Data

In [29]:
# get data
# df = pd.read_stata('../charitable_giving/Replication/AER merged.dta', convert_categoricals=False)
df = pd.read_stata('https://github.com/gsbDBI/ExperimentData/raw/master/Charitable/RawData/AER%20merged.dta',
                   convert_categoricals=False)
df = df.loc[(df['ratio'] == 0) | (df['ratio'] == 1)]
df = df.drop(['control', 'ratio', 'ratio2', 'ratio3',
            'size', 'size25', 'size50', 'size100', 'sizeno',
            'ask', 'askd1', 'askd2', 'askd3', 'ask1', 'ask2', 'ask3',
            'gave', 'amountchange', 'state50one', 'blue0'], axis=1)
# state50one just tags one (arbitrary?) observation for each state
# blue0 and red0 and perfectly collinear (when all variables are nonmissing); bluecty and redcty are not
df = df.dropna()
y = df['out_amountgive'].values
X = df[['treatment', 'red0',
        'hpa', 'year5', 'dormant', 'nonlit', 'cases', 'perbush', 'redcty', 'bluecty',
        'pwhite', 'pblack', 'page18_39', 'ave_hh_sz', 'median_hhincome', 'powner', 'psch_atlstba', 'pop_propurban']].values

# df = pd.read_stata('../charitable_giving/Replication/AER merged.dta', convert_categoricals=False)
# df = df.loc[(df['ratio'] == 0) | (df['ratio'] == 1)]
# df = df.drop(['control', 'ratio', 'ratio2', 'ratio3',
#             'size', 'size25', 'size50', 'size100', 'sizeno',
#             'ask', 'askd1', 'askd2', 'askd3', 'ask1', 'ask2', 'ask3',
#             'gave', 'amountchange', 'state50one', 'blue0'], axis=1)
# # state50one just tags one (arbitrary?) observation for each state
# # blue0 and red0 and perfectly collinear (when all variables are nonmissing); bluecty and redcty are not
# df = df.dropna()
# y = df['amount'].values
# X = df[['treatment', 'red0',
#         'hpa', 'year5', 'dormant', 'nonlit', 'cases', 'perbush', 'redcty', 'bluecty',
#         'pwhite', 'pblack', 'page18_39', 'ave_hh_sz', 'median_hhincome', 'powner', 'psch_atlstba', 'pop_propurban']].values

KeyError: 'out_amountgive'

In [None]:
# scale non-binary variables
y = y.astype(np.double)
X = X.astype(np.double)
idx_nonbi = [i for i in range(2, X.shape[1]) if type_of_target(X[:, i]) != 'binary']  # indices of non-binary variables (first and second columns should be binary)
X[:, idx_nonbi] = StandardScaler().fit_transform(X[:, idx_nonbi])
y_scale = np.std(y)
y = y / y_scale

# shuffle data
inds = np.arange(X.shape[0])
np.random.seed(123)
np.random.shuffle(inds)
X, y = X[inds].copy(), y[inds].copy()

# # filter extreme party and treatment propensities
# clf_party = LogisticRegressionCV(cv=5, max_iter=10000, random_state=123).fit(X[:, 2:], X[:, 1])
# clf_treat = LogisticRegressionCV(cv=5, max_iter=10000, random_state=123).fit(X[:, 1:], X[:, 0])
# prop_party = clf_party.predict_proba(X[:, 2:])
# prop_treat = clf_treat.predict_proba(X[:, 1:])
# filt = (prop_party[:, 1] <= .9) & (prop_party[:, 1] >= .1) & (prop_treat[:, 1] <= .9) & (prop_treat[:, 1] >= .1)
# print(X.shape[0], np.sum(filt))
# X, y = X[filt], y[filt]

# Adversarial RF

In [None]:
from advreisz.ensemble import AdvEnsembleReisz, RFrr, interactive_poly_feature_fns
def get_rf_fn(X):
    return lambda: AdvEnsembleReisz(moment_fn=moment_fn, 
                                    n_treatments=2,
                                    max_abs_value=15,
                                    n_iter=100, degree=1)

est = DebiasedMoment(moment_fn=moment_fn,
                     get_reisz_fn=get_rf_fn,
                     get_reg_fn=get_reg_fn, n_splits=1)
est.fit(X, y)
p, s, l, u = est.avg_moment()
{'point': p * y_scale, 'stderr': s * y_scale, 'lower': l * y_scale, 'upper': u * y_scale}

# Test Adversarial RF on 401k Data

In [30]:
import pandas as pd
def moment_fn(x, test_fn):
    n_obs = x.shape[0]
    t1 = np.hstack([np.ones((n_obs, 1)), x[:, 1:]])
    t0 = np.hstack([np.zeros((n_obs, 1)), x[:, 1:]])
    return test_fn(t1) - test_fn(t0)

def get_rf_fn(X):
    return lambda: AdvEnsembleReisz(moment_fn=moment_fn,
                                    n_treatments=1,
                                    max_abs_value=15,
                                    degree=1)

# from advreisz.ensemble import AdvEnsembleReisz

# def get_rf_fn(X):
#     return lambda: AdvEnsembleReisz(moment_fn=moment_fn, max_abs_value=15, degree=1)

df = pd.read_csv(f'401k/quintile0_trimmed.csv', index_col=0)
y = df['Y'].values
X = df[['D'] + [f'X{i}' for i in np.arange(1, 10)]].values

# scale data
y = y.astype(np.double)
X = X.astype(np.double)
X[:, 1:5] = StandardScaler().fit_transform(X[:, 1:5])
y_scale = np.std(y)
y = y / y_scale

# shuffle data
inds = np.arange(X.shape[0])
np.random.seed(123)
np.random.shuffle(inds)
X, y = X[inds].copy(), y[inds].copy()

# filter extrement propensities
clf = LogisticRegressionCV(cv=5, max_iter=10000, random_state=123).fit(X[:, 1:], X[:, 0])
prop = clf.predict_proba(X[:, 1:])
filt = (prop[:, 1] <= .9) & (prop[:, 1] >= .1)
print(X.shape[0], np.sum(filt))
X, y = X[filt], y[filt]

est = DebiasedMoment(moment_fn=moment_fn,
                     get_reisz_fn=get_rf_fn,
                     get_reg_fn=get_reg_fn, n_splits=1)
est.fit(X, y)
p, s, l, u = est.avg_moment()
{'point': p * y_scale, 'stderr': s * y_scale, 'lower': l * y_scale, 'upper': u * y_scale}

9848 9804


{'point': 7489.894500144199,
 'stderr': 1116.3064149611787,
 'lower': 5301.703958403848,
 'upper': 9678.085041884551}

END

In [101]:
n_splits = 1
res = {}
q=0  # Data not split up into quantiles
res[f'q={q}'] = {}
print(f'Quintile={q}')

for name, get_reisz_fn in [
                            # ('splin', get_splin_fn),
                            # ('advrkhs', get_advkernel_fn),
                            # # ('rkhs', get_kernel_fn),
                            # ('nys_advrkhs', get_advnyskernel_fn),
                            # # ('nys_rkhs', get_nyskernel_fn),
                            # ('plugin_lg', get_lg_plugin_fn),
                            # ('plugin_rf', get_rf_plugin_fn),
                            # ('advnnet', get_agmm_fn),
                            ('advrf', get_rf_fn)
                            ]:
    est = DebiasedMoment(moment_fn=moment_fn,
                            get_reisz_fn=get_reisz_fn,
                            get_reg_fn=get_reg_fn, n_splits=n_splits)
    est.fit(X, y)
    p, s, l, u = est.avg_moment()
    res[f'q={q}'][name] = {'point': p * y_scale, 'stderr': s * y_scale,
                            'lower': l * y_scale, 'upper': u * y_scale}
    p, s, l, u = est.avg_moment(tmle=True)
    res[f'q={q}'][f'{name}_tmle'] = {'point': p * y_scale, 'stderr': s * y_scale,
                                        'lower': l * y_scale, 'upper': u * y_scale}

print(res[f'q={q}'])
res[f'q={q}'] = pd.DataFrame(res[f'q={q}']).transpose()
res = pd.concat(res)

Quintile=0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
{'advrf': {'point': 3.3248490443262977e-11, 'stderr': 7959.031235195162, 'lower': -15601.340847688041, 'upper': 15601.340847688109}, 'advrf_tmle': {'point': 9.23569178979527e-12, 'stderr': 7959.031235195162, 'lower': -15601.340847688067, 'upper': 15601.340847688083}}
