In [1]:
### imports
import warnings
warnings.simplefilter('ignore')
import itertools
import numpy as np
import matplotlib.pyplot as plt 
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from mliv.dgps import get_data, get_tau_fn, fn_dict
from mliv.neuralnet.utilities import mean_ci
from mliv.neuralnet import AGMMEarlyStop as AGMM
from mliv.neuralnet.moments import avg_small_diff
from sklearn.ensemble import RandomForestRegressor
import joblib
import pandas as pd
from collections import OrderedDict
import scipy


def plot_results(fname, n, iv_strength, dr, tmle, ipw, direct, true):
    plt.title(f'fname={fname}, n={n}, strength={iv_strength}, true={true:.3f}\n'
              f'dr: Cov={np.mean((dr[:, 1] <= true) & (true <= dr[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((dr[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((dr[:, 0]-true)):.3f}\n'
              f'tmle: Cov={np.mean((tmle[:, 1] <= true) & (true <= tmle[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((tmle[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((tmle[:, 0]-true)):.3f}\n'
              f'ipw: Cov={np.mean((ipw[:, 1] <= true) & (true <= ipw[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((ipw[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((ipw[:, 0]-true)):.3f}\n'
              f'direct: Cov={np.mean((direct[:, 1] <= true) & (true <= direct[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((direct[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((direct[:, 0]-true)):.3f}\n')
    plt.hist(dr[:, 0], label='dr')
    plt.hist(tmle[:, 0], label='tmle', alpha=.4)
    plt.hist(ipw[:, 0], label='ipw', alpha=.4)
    plt.hist(direct[:, 0], label='direct', alpha=.4)
    plt.legend()

n_z = 1
n_t = 1
dgp_num = 5
epsilon = 0.1 # average finite difference epsilon
moment_fn = lambda x, fn, device: avg_small_diff(x, fn, device, epsilon)

In [28]:
for clever in [False, True]:
    resd = {}
    for n_t in [1]:
        resd[n_t] = {}
        for fname in ['abs', '2dpoly', 'sigmoid', 'sin']:
            resd[n_t][fname] = OrderedDict()
            for n in [500, 1000, 2000]:
                lambda_l2_h = .1/n**(.9)
                print(lambda_l2_h)
                nkey = f'$n={n}$'
                resd[n_t][fname][nkey] = {}
                for iv_strength in [0.2, 0.5]:
                    ivkey = f'$\\pi={iv_strength}$'
                    resd[n_t][fname][nkey][ivkey] = {}
                    for endogeneity_strength in [0.05, 0.1]:
                        true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_{endogeneity_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                        ekey = f'$\\rho={endogeneity_strength}$'
                        resd[n_t][fname][nkey][ivkey][ekey] = {}
                        for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                            data = np.array([r[it] for r in results])
                            if method in ['dr', 'tmle']:
                                cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                            else:
                                cov = 'NA'
                            resd[n_t][fname][nkey][ivkey][ekey][method] = {
                                            'cov': cov,
                                            'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                            'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                            'std': f'{np.std(data[:, 0]):.3f}'}
                        resd[n_t][fname][nkey][ivkey][ekey] = pd.DataFrame(resd[n_t][fname][nkey][ivkey][ekey])
                    resd[n_t][fname][nkey][ivkey] = pd.concat(resd[n_t][fname][nkey][ivkey], sort=False)
                resd[n_t][fname][nkey] = pd.concat(resd[n_t][fname][nkey], sort=False)
            resd[n_t][fname] = pd.concat(resd[n_t][fname], sort=False)
        resd[n_t] = pd.concat(resd[n_t], sort=False)
    display(pd.concat(resd).droplevel(0).unstack(level=4))
    print(pd.concat(resd).droplevel(0).unstack(level=4).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||lll|lll|lll|lll|',
                                                    multicolumn_format='c|'))

0.00037232911332721384
0.00019952623149688796
0.00010692345999911879


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,dr,dr,dr,dr,tmle,tmle,tmle,tmle,ipw,ipw,ipw,ipw,direct,direct,direct,direct
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,cov,rmse,bias,std,cov,rmse,bias,std,cov,rmse,bias,std,cov,rmse,bias,std
abs,$n=500$,$\pi=0.2$,$\rho=0.05$,95,0.151,0.006,0.151,94,0.16,0.005,0.16,,0.147,0.006,0.146,,0.132,0.004,0.132
abs,$n=500$,$\pi=0.2$,$\rho=0.1$,96,0.151,0.01,0.15,93,0.16,0.009,0.16,,0.147,0.009,0.147,,0.129,0.002,0.129
abs,$n=500$,$\pi=0.5$,$\rho=0.05$,95,0.064,0.001,0.064,94,0.067,0.001,0.067,,0.059,0.002,0.059,,0.074,0.003,0.074
abs,$n=500$,$\pi=0.5$,$\rho=0.1$,96,0.063,0.002,0.062,94,0.066,0.002,0.066,,0.06,0.002,0.06,,0.08,0.001,0.08
abs,$n=1000$,$\pi=0.2$,$\rho=0.05$,94,0.117,0.004,0.116,94,0.119,0.003,0.119,,0.116,0.002,0.116,,0.104,0.003,0.104
abs,$n=1000$,$\pi=0.2$,$\rho=0.1$,94,0.117,0.008,0.117,94,0.12,0.006,0.12,,0.117,0.005,0.116,,0.102,0.005,0.102
abs,$n=1000$,$\pi=0.5$,$\rho=0.05$,95,0.049,0.001,0.049,95,0.05,0.001,0.05,,0.049,0.0,0.049,,0.047,0.002,0.047
abs,$n=1000$,$\pi=0.5$,$\rho=0.1$,94,0.049,0.002,0.048,94,0.049,0.002,0.049,,0.049,0.001,0.049,,0.044,0.002,0.044
abs,$n=2000$,$\pi=0.2$,$\rho=0.05$,95,0.08,0.008,0.079,95,0.08,0.007,0.08,,0.08,0.006,0.08,,0.071,0.003,0.071
abs,$n=2000$,$\pi=0.2$,$\rho=0.1$,95,0.08,0.01,0.08,95,0.08,0.009,0.08,,0.081,0.008,0.081,,0.069,0.002,0.069


\begin{tabular}{lll||lll|lll|lll|lll|}
\toprule
 &  &  &  & \multicolumn{4}{c|}{dr} & \multicolumn{4}{c|}{tmle} & \multicolumn{4}{c|}{ipw} & \multicolumn{4}{c|}{direct} \\
 &  &  &  & cov & rmse & bias & std & cov & rmse & bias & std & cov & rmse & bias & std & cov & rmse & bias & std \\
\midrule
\multirow[t]{12}{*}{\textbf{abs}} & \multirow[t]{4}{*}{\textbf{$n=500$}} & \multirow[t]{2}{*}{\textbf{$\pi=0.2$}} & \textbf{$\rho=0.05$} & 95 & 0.151 & 0.006 & 0.151 & 94 & 0.160 & 0.005 & 0.160 & NA & 0.147 & 0.006 & 0.146 & NA & 0.132 & 0.004 & 0.132 \\
\textbf{} & \textbf{} & \textbf{} & \textbf{$\rho=0.1$} & 96 & 0.151 & 0.010 & 0.150 & 93 & 0.160 & 0.009 & 0.160 & NA & 0.147 & 0.009 & 0.147 & NA & 0.129 & 0.002 & 0.129 \\
\cline{3-20}
\textbf{} & \textbf{} & \multirow[t]{2}{*}{\textbf{$\pi=0.5$}} & \textbf{$\rho=0.05$} & 95 & 0.064 & 0.001 & 0.064 & 94 & 0.067 & 0.001 & 0.067 & NA & 0.059 & 0.002 & 0.059 & NA & 0.074 & 0.003 & 0.074 \\
\textbf{} & \textbf{} & \textbf{} & \textbf{$\rho=0.1

FileNotFoundError: [Errno 2] No such file or directory: 'res_fn_abs_n_500_n_t_1_stregth_0.2_0.05_eps_0.1_clever_True_l2h_0.0004.jbl'

In [None]:
for clever in [False]:
    resd = {}
    for n_t in [1]:
        resd[n_t] = {}
        for fname in ['2dpoly']:
            resd[n_t][fname] = OrderedDict()
            for n in [2000, 20000]:
                lambda_l2_h = .1/n**(.9)
                nkey = f'$n={n}$'
                resd[n_t][fname][nkey] = {}
                for iv_strength in [0.05, 0.1]:
                    ivkey = f'$\pi={iv_strength}$'
                    resd[n_t][fname][nkey][ivkey] = {}
                    for endogeneity_strength in [0.05, 0.1]:
                        true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_{endogeneity_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                    
                        ekey = f'$\rho={endogeneity_strength}$'
                        resd[n_t][fname][nkey][ivkey][ekey] = {}
                        for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                            data = np.array([r[it] for r in results])
                            if method in ['dr', 'tmle']:
                                cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                            else:
                                cov = 'NA'
                            resd[n_t][fname][nkey][ivkey][ekey][method] = {
                                            'cov': cov,
                                            'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                            'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                            'std': f'{np.std(data[:, 0]):.3f}'}
                        resd[n_t][fname][nkey][ivkey][ekey] = pd.concat({f'${true:.2f}$': pd.DataFrame(resd[n_t][fname][nkey][ivkey][ekey])})
                    resd[n_t][fname][nkey][ivkey] = pd.concat(resd[n_t][fname][nkey][ivkey], sort=False)
                resd[n_t][fname][nkey] = pd.concat(resd[n_t][fname][nkey], sort=False)
            resd[n_t][fname] = pd.concat(resd[n_t][fname], sort=False)
        resd[n_t] = pd.concat(resd[n_t], sort=False)
    display(pd.concat(resd).unstack(level=6))
    print(pd.concat(resd).unstack(level=6).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||lll|lll|lll|lll|',
                                                    multicolumn_format='c|'))

In [8]:
endogeneity_strength = 0.3
fname = 'cct'
for clever in [False]:
    resd = {}
    for n_t in [0, 5, 10]:
        resd[f'$n_x={n_t}$'] = {}
        for n in [1000, 5000]:
            lambda_l2_h = .1/n**(.9)
            nkey = f'$n={n}$'
            resd[f'$n_x={n_t}$'][nkey] = {}
            for iv_strength in [0.0, 0.5]:
                if n_t == 0 and iv_strength == 0.5:
                    continue

                true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_{endogeneity_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                ivkey = "$\\tilde{\\rho}=" + f"{iv_strength}$"
                resd[f'$n_x={n_t}$'][nkey][ivkey] = {}
                for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                    if method == 'ipw':
                        continue
                    data = np.array([r[it] for r in results])
                    if method in ['dr', 'tmle']:
                        cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                    else:
                        cov = 'NA'
                    resd[f'$n_x={n_t}$'][nkey][ivkey][method] = {
                                    'cov': cov,
                                    'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                    'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                    'std': f'{np.std(data[:, 0]):.3f}'}
                resd[f'$n_x={n_t}$'][nkey][ivkey] = pd.DataFrame(resd[f'$n_x={n_t}$'][nkey][ivkey])
            resd[f'$n_x={n_t}$'][nkey] = pd.concat(resd[f'$n_x={n_t}$'][nkey], sort=False)
        resd[f'$n_x={n_t}$'] = pd.concat(resd[f'$n_x={n_t}$'], sort=False)
    display(pd.concat(resd).unstack(level=3))
    print(pd.concat(resd).unstack(level=3).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||llll|llll|llll|',
                                                    multicolumn_format='c|'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,dr,dr,dr,dr,tmle,tmle,tmle,tmle,direct,direct,direct,direct
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,cov,rmse,bias,std,cov,rmse,bias,std,cov,rmse,bias,std
$n_x=0$,$n=1000$,$\tilde{\rho}=0.0$,92,0.084,0.027,0.08,92,0.077,0.029,0.072,,0.235,0.081,0.221
$n_x=0$,$n=5000$,$\tilde{\rho}=0.0$,94,0.032,0.008,0.031,94,0.034,0.01,0.032,,0.027,0.013,0.024
$n_x=5$,$n=1000$,$\tilde{\rho}=0.0$,86,0.181,0.05,0.174,82,0.202,0.039,0.198,,0.313,0.131,0.285
$n_x=5$,$n=1000$,$\tilde{\rho}=0.5$,57,0.324,0.213,0.245,50,0.405,0.262,0.309,,0.331,0.02,0.33
$n_x=5$,$n=5000$,$\tilde{\rho}=0.0$,55,0.143,0.092,0.109,53,0.161,0.105,0.121,,0.065,0.006,0.064
$n_x=5$,$n=5000$,$\tilde{\rho}=0.5$,42,0.142,0.097,0.103,44,0.146,0.084,0.119,,0.173,0.16,0.065
$n_x=10$,$n=1000$,$\tilde{\rho}=0.0$,86,0.139,0.011,0.138,80,0.164,0.025,0.162,,0.274,0.01,0.274
$n_x=10$,$n=1000$,$\tilde{\rho}=0.5$,62,0.241,0.116,0.211,56,0.317,0.155,0.277,,0.352,0.027,0.351
$n_x=10$,$n=5000$,$\tilde{\rho}=0.0$,90,0.027,0.008,0.026,80,0.044,0.026,0.036,,0.074,0.071,0.022
$n_x=10$,$n=5000$,$\tilde{\rho}=0.5$,48,0.12,0.085,0.085,57,0.117,0.058,0.101,,0.18,0.157,0.087


\begin{tabular}{lll||llll|llll|llll|}
\toprule
 &  &  & \multicolumn{4}{c|}{dr} & \multicolumn{4}{c|}{tmle} & \multicolumn{4}{c|}{direct} \\
 &  &  & cov & rmse & bias & std & cov & rmse & bias & std & cov & rmse & bias & std \\
\midrule
\multirow[t]{2}{*}{\textbf{$n_x=0$}} & \textbf{$n=1000$} & \textbf{$\tilde{\rho}=0.0$} & 92 & 0.084 & 0.027 & 0.080 & 92 & 0.077 & 0.029 & 0.072 & NA & 0.235 & 0.081 & 0.221 \\
\cline{2-15}
\textbf{} & \textbf{$n=5000$} & \textbf{$\tilde{\rho}=0.0$} & 94 & 0.032 & 0.008 & 0.031 & 94 & 0.034 & 0.010 & 0.032 & NA & 0.027 & 0.013 & 0.024 \\
\cline{1-15} \cline{2-15}
\multirow[t]{4}{*}{\textbf{$n_x=5$}} & \multirow[t]{2}{*}{\textbf{$n=1000$}} & \textbf{$\tilde{\rho}=0.0$} & 86 & 0.181 & 0.050 & 0.174 & 82 & 0.202 & 0.039 & 0.198 & NA & 0.313 & 0.131 & 0.285 \\
\textbf{} & \textbf{} & \textbf{$\tilde{\rho}=0.5$} & 57 & 0.324 & 0.213 & 0.245 & 50 & 0.405 & 0.262 & 0.309 & NA & 0.331 & 0.020 & 0.330 \\
\cline{2-15}
\textbf{} & \multirow[t]{2}{*}{\textbf{$n=50

In [9]:
endogeneity_strength = 0.3
fname = 'cct'
for clever in [False]:
    resd = {}
    for n_t in [0, 5, 10]:
        resd[f'$n_x={n_t}$'] = {}
        for n in [1000, 5000]:
            lambda_l2_h = .1/n**(.9)
            nkey = f'$n={n}$'
            resd[f'$n_x={n_t}$'][nkey] = {}
            for iv_strength in [0.0, 0.5]:
                if n_t == 0 and iv_strength == 0.5:
                    continue

                true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_{endogeneity_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                ivkey = "$\\tilde{\\rho}=" + f"{iv_strength}$"
                resd[f'$n_x={n_t}$'][nkey][ivkey] = {}
                for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                    if method == 'ipw':
                        continue
                    data = np.array([r[it] for r in results])
                    confidence = .95
                    se = (data[:, 2] - data[:, 0]) / scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
                    confidence = .99
                    data[:, 1] = data[:, 0] - se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
                    data[:, 2] = data[:, 0] + se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
                    if method in ['dr', 'tmle']:
                        cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                    else:
                        cov = 'NA'
                    resd[f'$n_x={n_t}$'][nkey][ivkey][method] = {
                                    'cov': cov,
                                    'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                    'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                    'std': f'{np.std(data[:, 0]):.3f}'}
                resd[f'$n_x={n_t}$'][nkey][ivkey] = pd.DataFrame(resd[f'$n_x={n_t}$'][nkey][ivkey])
            resd[f'$n_x={n_t}$'][nkey] = pd.concat(resd[f'$n_x={n_t}$'][nkey], sort=False)
        resd[f'$n_x={n_t}$'] = pd.concat(resd[f'$n_x={n_t}$'], sort=False)
    display(pd.concat(resd).unstack(level=3))
    print(pd.concat(resd).unstack(level=3).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||llll|llll|llll|',
                                                    multicolumn_format='c|'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,dr,dr,dr,dr,tmle,tmle,tmle,tmle,direct,direct,direct,direct
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,cov,rmse,bias,std,cov,rmse,bias,std,cov,rmse,bias,std
$n_x=0$,$n=1000$,$\tilde{\rho}=0.0$,98,0.084,0.027,0.08,98,0.077,0.029,0.072,,0.235,0.081,0.221
$n_x=0$,$n=5000$,$\tilde{\rho}=0.0$,99,0.032,0.008,0.031,99,0.034,0.01,0.032,,0.027,0.013,0.024
$n_x=5$,$n=1000$,$\tilde{\rho}=0.0$,95,0.181,0.05,0.174,92,0.202,0.039,0.198,,0.313,0.131,0.285
$n_x=5$,$n=1000$,$\tilde{\rho}=0.5$,73,0.324,0.213,0.245,64,0.405,0.262,0.309,,0.331,0.02,0.33
$n_x=5$,$n=5000$,$\tilde{\rho}=0.0$,68,0.143,0.092,0.109,66,0.161,0.105,0.121,,0.065,0.006,0.064
$n_x=5$,$n=5000$,$\tilde{\rho}=0.5$,54,0.142,0.097,0.103,56,0.146,0.084,0.119,,0.173,0.16,0.065
$n_x=10$,$n=1000$,$\tilde{\rho}=0.0$,94,0.139,0.011,0.138,90,0.164,0.025,0.162,,0.274,0.01,0.274
$n_x=10$,$n=1000$,$\tilde{\rho}=0.5$,74,0.241,0.116,0.211,68,0.317,0.155,0.277,,0.352,0.027,0.351
$n_x=10$,$n=5000$,$\tilde{\rho}=0.0$,97,0.027,0.008,0.026,90,0.044,0.026,0.036,,0.074,0.071,0.022
$n_x=10$,$n=5000$,$\tilde{\rho}=0.5$,61,0.12,0.085,0.085,68,0.117,0.058,0.101,,0.18,0.157,0.087


\begin{tabular}{lll||llll|llll|llll|}
\toprule
 &  &  & \multicolumn{4}{c|}{dr} & \multicolumn{4}{c|}{tmle} & \multicolumn{4}{c|}{direct} \\
 &  &  & cov & rmse & bias & std & cov & rmse & bias & std & cov & rmse & bias & std \\
\midrule
\multirow[t]{2}{*}{\textbf{$n_x=0$}} & \textbf{$n=1000$} & \textbf{$\tilde{\rho}=0.0$} & 98 & 0.084 & 0.027 & 0.080 & 98 & 0.077 & 0.029 & 0.072 & NA & 0.235 & 0.081 & 0.221 \\
\cline{2-15}
\textbf{} & \textbf{$n=5000$} & \textbf{$\tilde{\rho}=0.0$} & 99 & 0.032 & 0.008 & 0.031 & 99 & 0.034 & 0.010 & 0.032 & NA & 0.027 & 0.013 & 0.024 \\
\cline{1-15} \cline{2-15}
\multirow[t]{4}{*}{\textbf{$n_x=5$}} & \multirow[t]{2}{*}{\textbf{$n=1000$}} & \textbf{$\tilde{\rho}=0.0$} & 95 & 0.181 & 0.050 & 0.174 & 92 & 0.202 & 0.039 & 0.198 & NA & 0.313 & 0.131 & 0.285 \\
\textbf{} & \textbf{} & \textbf{$\tilde{\rho}=0.5$} & 73 & 0.324 & 0.213 & 0.245 & 64 & 0.405 & 0.262 & 0.309 & NA & 0.331 & 0.020 & 0.330 \\
\cline{2-15}
\textbf{} & \multirow[t]{2}{*}{\textbf{$n=50