In [1]:
### imports
import warnings
warnings.simplefilter('ignore')
import itertools
import numpy as np
import matplotlib.pyplot as plt 
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from mliv.dgps import get_data, get_tau_fn, fn_dict
from mliv.neuralnet.utilities import mean_ci
from mliv.neuralnet import AGMMEarlyStop as AGMM
from mliv.neuralnet.moments import avg_small_diff
from sklearn.ensemble import RandomForestRegressor
import joblib
import pandas as pd
from collections import OrderedDict


def plot_results(fname, n, iv_strength, dr, tmle, ipw, direct, true):
    plt.title(f'fname={fname}, n={n}, strength={iv_strength}, true={true:.3f}\n'
              f'dr: Cov={np.mean((dr[:, 1] <= true) & (true <= dr[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((dr[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((dr[:, 0]-true)):.3f}\n'
              f'tmle: Cov={np.mean((tmle[:, 1] <= true) & (true <= tmle[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((tmle[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((tmle[:, 0]-true)):.3f}\n'
              f'ipw: Cov={np.mean((ipw[:, 1] <= true) & (true <= ipw[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((ipw[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((ipw[:, 0]-true)):.3f}\n'
              f'direct: Cov={np.mean((direct[:, 1] <= true) & (true <= direct[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((direct[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((direct[:, 0]-true)):.3f}\n')
    plt.hist(dr[:, 0], label='dr')
    plt.hist(tmle[:, 0], label='tmle', alpha=.4)
    plt.hist(ipw[:, 0], label='ipw', alpha=.4)
    plt.hist(direct[:, 0], label='direct', alpha=.4)
    plt.legend()

n_z = 1
n_t = 1
dgp_num = 5
epsilon = 0.1 # average finite difference epsilon
moment_fn = lambda x, fn, device: avg_small_diff(x, fn, device, epsilon)

In [None]:
for clever in [False, True]:
    resd = {}
    for n_t in [1]:
        resd[n_t] = {}
        for fname in ['abs', '2dpoly', 'sigmoid', 'sin']:
            resd[n_t][fname] = OrderedDict()
            for n in [500, 1000, 2000]:
                lambda_l2_h = .1/n**(.9)
                print(lambda_l2_h)
                nkey = f'$n={n}$'
                resd[n_t][fname][nkey] = {}
                for iv_strength in [0.2, 0.5]:
                    true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                    ivkey = f'$\rho={iv_strength}$'
                    resd[n_t][fname][nkey][ivkey] = {}
                    for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                        data = np.array([r[it] for r in results])
                        if method in ['dr', 'tmle']:
                            cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                        else:
                            cov = 'NA'
                        resd[n_t][fname][nkey][ivkey][method] = {
                                        'cov': cov,
                                        'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                        'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                        'std': f'{np.std(data[:, 0]):.3f}'}
                    resd[n_t][fname][nkey][ivkey] = pd.concat({f'${true:.2f}$': pd.DataFrame(resd[n_t][fname][nkey][ivkey])})
                resd[n_t][fname][nkey] = pd.concat(resd[n_t][fname][nkey], sort=False)
            resd[n_t][fname] = pd.concat(resd[n_t][fname], sort=False)
        resd[n_t] = pd.concat(resd[n_t], sort=False)
    display(pd.concat(resd).unstack(level=5))
    print(pd.concat(resd).unstack(level=5).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||lll|lll|lll|lll|',
                                                    multicolumn_format='c|'))

In [None]:
for clever in [False]:
    resd = {}
    for n_t in [1]:
        resd[n_t] = {}
        for fname in ['2dpoly']:
            resd[n_t][fname] = OrderedDict()
            for n in [2000, 20000]:
                lambda_l2_h = .1/n**(.9)
                nkey = f'$n={n}$'
                resd[n_t][fname][nkey] = {}
                for iv_strength in [0.05, 0.1]:
                    true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                    ivkey = f'$\rho={iv_strength}$'
                    resd[n_t][fname][nkey][ivkey] = {}
                    for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                        data = np.array([r[it] for r in results])
                        if method in ['dr', 'tmle']:
                            cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                        else:
                            cov = 'NA'
                        resd[n_t][fname][nkey][ivkey][method] = {
                                        'cov': cov,
                                        'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                        'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                        'std': f'{np.std(data[:, 0]):.3f}'}
                    resd[n_t][fname][nkey][ivkey] = pd.concat({f'${true:.2f}$': pd.DataFrame(resd[n_t][fname][nkey][ivkey])})
                resd[n_t][fname][nkey] = pd.concat(resd[n_t][fname][nkey], sort=False)
            resd[n_t][fname] = pd.concat(resd[n_t][fname], sort=False)
        resd[n_t] = pd.concat(resd[n_t], sort=False)
    display(pd.concat(resd).unstack(level=5))
    print(pd.concat(resd).unstack(level=5).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||lll|lll|lll|lll|',
                                                    multicolumn_format='c|'))

In [19]:
import pandas as pd
from collections import OrderedDict

for clever in [False]:
    resd = {}
    for n_t in [0, 5, 10]:
        resd[n_t] = {}
        for fname in ['cct']:
            resd[n_t][fname] = OrderedDict()
            for n in [1000, 5000]:
                lambda_l2_h = .1/n**(.9)
                nkey = f'$n={n}$'
                resd[n_t][fname][nkey] = {}
                for iv_strength in [0.0, 0.5]:
                    true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                    ivkey = f'$\rho={iv_strength}$'
                    resd[n_t][fname][nkey][ivkey] = {}
                    for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                        data = np.array([r[it] for r in results])
                        if method in ['dr', 'tmle']:
                            cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                        else:
                            cov = 'NA'
                        resd[n_t][fname][nkey][ivkey][method] = {
                                        'cov': cov,
                                        'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                        'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                        'std': f'{np.std(data[:, 0]):.3f}'}
                    resd[n_t][fname][nkey][ivkey] = pd.concat({f'${true:.2f}$': pd.DataFrame(resd[n_t][fname][nkey][ivkey])})
                resd[n_t][fname][nkey] = pd.concat(resd[n_t][fname][nkey], sort=False)
            resd[n_t][fname] = pd.concat(resd[n_t][fname], sort=False)
        resd[n_t] = pd.concat(resd[n_t], sort=False)
    display(pd.concat(resd).unstack(level=5))
    print(pd.concat(resd).unstack(level=5).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||lll|lll|lll|lll|',
                                                    multicolumn_format='c|'))

0.00019952623149688796
4.687345823184198e-05
0.00019952623149688796
4.687345823184198e-05
0.00019952623149688796
4.687345823184198e-05


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,dr,dr,dr,dr,tmle,tmle,tmle,tmle,ipw,ipw,ipw,ipw,direct,direct,direct,direct
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,cov,rmse,bias,std,cov,rmse,bias,std,cov,rmse,bias,std,cov,rmse,bias,std
0,cct,$n=1000$,$\rho=0.0$,$1.00$,100,0.063,0.032,0.054,100,0.073,0.038,0.062,,0.488,0.088,0.48,,0.192,0.055,0.184
0,cct,$n=1000$,$\rho=0.5$,$1.00$,100,0.059,0.028,0.052,100,0.067,0.034,0.057,,0.45,0.083,0.442,,0.167,0.065,0.154
0,cct,$n=5000$,$\rho=0.0$,$1.00$,90,0.036,0.013,0.034,85,0.04,0.017,0.037,,0.295,0.186,0.229,,0.027,0.008,0.026
0,cct,$n=5000$,$\rho=0.5$,$1.00$,90,0.033,0.011,0.031,90,0.038,0.015,0.035,,0.275,0.187,0.201,,0.021,0.006,0.02
5,cct,$n=1000$,$\rho=0.0$,$1.00$,65,0.223,0.116,0.191,65,0.24,0.105,0.215,,0.807,0.377,0.713,,0.361,0.158,0.325
5,cct,$n=1000$,$\rho=0.5$,$1.00$,35,0.288,0.165,0.236,30,0.337,0.15,0.301,,1.172,0.047,1.171,,0.336,0.136,0.307
5,cct,$n=5000$,$\rho=0.0$,$1.00$,55,0.057,0.043,0.038,50,0.073,0.056,0.047,,0.72,0.53,0.488,,0.077,0.046,0.061
5,cct,$n=5000$,$\rho=0.5$,$1.00$,55,0.089,0.022,0.087,55,0.091,0.0,0.091,,0.884,0.069,0.881,,0.157,0.115,0.107
10,cct,$n=1000$,$\rho=0.0$,$1.00$,70,0.167,0.009,0.166,60,0.168,0.036,0.164,,0.754,0.345,0.67,,0.359,0.02,0.359
10,cct,$n=1000$,$\rho=0.5$,$1.00$,55,0.237,0.163,0.173,50,0.273,0.171,0.213,,1.033,0.072,1.031,,0.403,0.145,0.376


\begin{tabular}{lll||lll|lll|lll|lll|}
\toprule
 &  &  &  &  & \multicolumn{4}{c|}{dr} & \multicolumn{4}{c|}{tmle} & \multicolumn{4}{c|}{ipw} & \multicolumn{4}{c|}{direct} \\
 &  &  &  &  & cov & rmse & bias & std & cov & rmse & bias & std & cov & rmse & bias & std & cov & rmse & bias & std \\
\midrule
\multirow[t]{4}{*}{\textbf{0}} & \multirow[t]{4}{*}{\textbf{cct}} & \multirow[t]{2}{*}{\textbf{$n=1000$}} & \textbf{$ho=0.0$} & \textbf{$1.00$} & 100 & 0.063 & 0.032 & 0.054 & 100 & 0.073 & 0.038 & 0.062 & NA & 0.488 & 0.088 & 0.480 & NA & 0.192 & 0.055 & 0.184 \\
\cline{4-21}
\textbf{} & \textbf{} & \textbf{} & \textbf{$ho=0.5$} & \textbf{$1.00$} & 100 & 0.059 & 0.028 & 0.052 & 100 & 0.067 & 0.034 & 0.057 & NA & 0.450 & 0.083 & 0.442 & NA & 0.167 & 0.065 & 0.154 \\
\cline{3-21} \cline{4-21}
\textbf{} & \textbf{} & \multirow[t]{2}{*}{\textbf{$n=5000$}} & \textbf{$ho=0.0$} & \textbf{$1.00$} & 90 & 0.036 & 0.013 & 0.034 & 85 & 0.040 & 0.017 & 0.037 & NA & 0.295 & 0.186 & 0.229 & NA & 0.

In [27]:
import pandas as pd
from collections import OrderedDict

fname = 'cct'
for clever in [False]:
    resd = {}
    for n_t in [0, 5, 10]:
        resd[n_t] = {}
        for n in [1000, 5000]:
            lambda_l2_h = .1/n**(.9)
            nkey = f'$n={n}$'
            resd[n_t][nkey] = {}
            for iv_strength in [0.0, 0.5]:
                if n_t == 0 and iv_strength == 0.5:
                    continue

                true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                ivkey = f'$\\rho={iv_strength}$'
                resd[n_t][nkey][ivkey] = {}
                for it, method in enumerate(['dr', 'tmle', 'direct']):
                    data = np.array([r[it] for r in results])
                    confidence = .95
                    se = (data[:, 2] - data[:, 0]) / scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
                    confidence = .99
                    data[:, 1] = data[:, 0] - se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
                    data[:, 2] = data[:, 0] + se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
                    if method in ['dr', 'tmle']:
                        cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                    else:
                        cov = 'NA'
                    resd[n_t][nkey][ivkey][method] = {
                                    'cov': cov,
                                    'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                    'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                    'std': f'{np.std(data[:, 0]):.3f}'}
                resd[n_t][nkey][ivkey] = pd.DataFrame(resd[n_t][nkey][ivkey])
            resd[n_t][nkey] = pd.concat(resd[n_t][nkey], sort=False)
        resd[n_t] = pd.concat(resd[n_t], sort=False)
    display(pd.concat(resd).unstack(level=3))
    print(pd.concat(resd).unstack(level=3).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||lll|lll|lll|lll|',
                                                    multicolumn_format='c|'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,dr,dr,dr,dr,tmle,tmle,tmle,tmle,direct,direct,direct,direct
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,cov,rmse,bias,std,cov,rmse,bias,std,cov,rmse,bias,std
0,$n=1000$,$\rho=0.0$,98,0.088,0.022,0.085,97,0.082,0.023,0.079,,0.431,0.115,0.416
0,$n=5000$,$\rho=0.0$,99,0.033,0.011,0.031,99,0.035,0.013,0.033,,0.285,0.17,0.229
5,$n=1000$,$\rho=0.0$,95,0.177,0.027,0.175,89,0.202,0.011,0.201,,0.821,0.252,0.782
5,$n=1000$,$\rho=0.5$,73,0.358,0.263,0.243,58,0.427,0.308,0.295,,1.235,0.144,1.227
5,$n=5000$,$\rho=0.0$,66,0.135,0.092,0.099,69,0.148,0.102,0.107,,0.948,0.812,0.489
5,$n=5000$,$\rho=0.5$,58,0.132,0.089,0.098,64,0.134,0.074,0.112,,0.889,0.209,0.864
10,$n=1000$,$\rho=0.0$,90,0.133,0.027,0.13,90,0.172,0.01,0.172,,0.763,0.217,0.732
10,$n=1000$,$\rho=0.5$,70,0.204,0.157,0.131,60,0.282,0.222,0.175,,1.215,0.157,1.205
10,$n=5000$,$\rho=0.0$,95,0.024,0.011,0.022,90,0.045,0.031,0.032,,0.912,0.793,0.451
10,$n=5000$,$\rho=0.5$,70,0.096,0.054,0.08,75,0.098,0.03,0.094,,0.893,0.04,0.892


\begin{tabular}{lll||lll|lll|lll|lll|}
\toprule
 &  &  & \multicolumn{4}{c|}{dr} & \multicolumn{4}{c|}{tmle} & \multicolumn{4}{c|}{direct} \\
 &  &  & cov & rmse & bias & std & cov & rmse & bias & std & cov & rmse & bias & std \\
\midrule
\multirow[t]{2}{*}{\textbf{0}} & \textbf{$n=1000$} & \textbf{$\rho=0.0$} & 98 & 0.088 & 0.022 & 0.085 & 97 & 0.082 & 0.023 & 0.079 & NA & 0.431 & 0.115 & 0.416 \\
\cline{2-15}
\textbf{} & \textbf{$n=5000$} & \textbf{$\rho=0.0$} & 99 & 0.033 & 0.011 & 0.031 & 99 & 0.035 & 0.013 & 0.033 & NA & 0.285 & 0.170 & 0.229 \\
\cline{1-15} \cline{2-15}
\multirow[t]{4}{*}{\textbf{5}} & \multirow[t]{2}{*}{\textbf{$n=1000$}} & \textbf{$\rho=0.0$} & 95 & 0.177 & 0.027 & 0.175 & 89 & 0.202 & 0.011 & 0.201 & NA & 0.821 & 0.252 & 0.782 \\
\textbf{} & \textbf{} & \textbf{$\rho=0.5$} & 73 & 0.358 & 0.263 & 0.243 & 58 & 0.427 & 0.308 & 0.295 & NA & 1.235 & 0.144 & 1.227 \\
\cline{2-15}
\textbf{} & \multirow[t]{2}{*}{\textbf{$n=5000$}} & \textbf{$\rho=0.0$} & 66 & 0.135 & 