In [7]:
### imports
import warnings
warnings.simplefilter('ignore')
import itertools
import numpy as np
import matplotlib.pyplot as plt 
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from mliv.dgps import get_data, get_tau_fn, fn_dict
from mliv.neuralnet.utilities import mean_ci
from mliv.neuralnet import AGMMEarlyStop as AGMM
from mliv.neuralnet.moments import avg_small_diff
from sklearn.ensemble import RandomForestRegressor
import joblib
import pandas as pd
from collections import OrderedDict
import scipy


def plot_results(fname, n, iv_strength, dr, tmle, ipw, direct, true):
    plt.title(f'fname={fname}, n={n}, strength={iv_strength}, true={true:.3f}\n'
              f'dr: Cov={np.mean((dr[:, 1] <= true) & (true <= dr[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((dr[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((dr[:, 0]-true)):.3f}\n'
              f'tmle: Cov={np.mean((tmle[:, 1] <= true) & (true <= tmle[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((tmle[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((tmle[:, 0]-true)):.3f}\n'
              f'ipw: Cov={np.mean((ipw[:, 1] <= true) & (true <= ipw[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((ipw[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((ipw[:, 0]-true)):.3f}\n'
              f'direct: Cov={np.mean((direct[:, 1] <= true) & (true <= direct[:, 2])):.3f}, '
              f'rmse={np.sqrt(np.mean((direct[:, 0]-true)**2)):.3f}, '
              f'bias={np.mean((direct[:, 0]-true)):.3f}\n')
    plt.hist(dr[:, 0], label='dr')
    plt.hist(tmle[:, 0], label='tmle', alpha=.4)
    plt.hist(ipw[:, 0], label='ipw', alpha=.4)
    plt.hist(direct[:, 0], label='direct', alpha=.4)
    plt.legend()

n_z = 1
n_t = 1
dgp_num = 5
epsilon = 0.1 # average finite difference epsilon
moment_fn = lambda x, fn, device: avg_small_diff(x, fn, device, epsilon)

In [None]:
for clever in [False, True]:
    resd = {}
    for n_t in [1]:
        resd[n_t] = {}
        for fname in ['abs', '2dpoly', 'sigmoid', 'sin']:
            resd[n_t][fname] = OrderedDict()
            for n in [500, 1000, 2000]:
                lambda_l2_h = .1/n**(.9)
                print(lambda_l2_h)
                nkey = f'$n={n}$'
                resd[n_t][fname][nkey] = {}
                for iv_strength in [0.2, 0.5]:
                    ivkey = f'$\pi={iv_strength}$'
                    for endogeneity_strength in [0.3, 0.8]:
                        true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_{endogeneity_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                        ekey = f'$\rho={endogeneity_strength}$'
                        resd[n_t][fname][nkey][ivkey][ekey] = {}
                        for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                            data = np.array([r[it] for r in results])
                            if method in ['dr', 'tmle']:
                                cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                            else:
                                cov = 'NA'
                            resd[n_t][fname][nkey][ivkey][ekey][method] = {
                                            'cov': cov,
                                            'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                            'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                            'std': f'{np.std(data[:, 0]):.3f}'}
                        resd[n_t][fname][nkey][ivkey][ekey] = pd.concat({f'${true:.2f}$': pd.DataFrame(resd[n_t][fname][nkey][ivkey][ekey])})
                    resd[n_t][fname][nkey][ivkey] = pd.concat(resd[n_t][fname][nkey][ivkey], sort=False)
                resd[n_t][fname][nkey] = = pd.concat(resd[n_t][fname][nkey], sort=False)
            resd[n_t][fname] = pd.concat(resd[n_t][fname], sort=False)
        resd[n_t] = pd.concat(resd[n_t], sort=False)
    display(pd.concat(resd).unstack(level=6))
    print(pd.concat(resd).unstack(level=6).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||lll|lll|lll|lll|',
                                                    multicolumn_format='c|'))

In [None]:
for clever in [False]:
    resd = {}
    for n_t in [1]:
        resd[n_t] = {}
        for fname in ['2dpoly']:
            resd[n_t][fname] = OrderedDict()
            for n in [2000, 20000]:
                lambda_l2_h = .1/n**(.9)
                nkey = f'$n={n}$'
                resd[n_t][fname][nkey] = {}
                for iv_strength in [0.05, 0.1]:
                    ivkey = f'$\pi={iv_strength}$'
                    for endogeneity_strength in [0.3, 0.8]:
                        true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_{endogeneity_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                    
                        ekey = f'$\rho={endogeneity_strength}$'
                        resd[n_t][fname][nkey][ivkey][ekey] = {}
                        for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                            data = np.array([r[it] for r in results])
                            if method in ['dr', 'tmle']:
                                cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                            else:
                                cov = 'NA'
                            resd[n_t][fname][nkey][ivkey][ekey][method] = {
                                            'cov': cov,
                                            'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                            'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                            'std': f'{np.std(data[:, 0]):.3f}'}
                        resd[n_t][fname][nkey][ivkey][ekey] = pd.concat({f'${true:.2f}$': pd.DataFrame(resd[n_t][fname][nkey][ivkey][ekey])})
                    resd[n_t][fname][nkey][ivkey] = pd.concat(resd[n_t][fname][nkey][ivkey], sort=False)
                resd[n_t][fname][nkey] = pd.concat(resd[n_t][fname][nkey], sort=False)
            resd[n_t][fname] = pd.concat(resd[n_t][fname], sort=False)
        resd[n_t] = pd.concat(resd[n_t], sort=False)
    display(pd.concat(resd).unstack(level=6))
    print(pd.concat(resd).unstack(level=6).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||lll|lll|lll|lll|',
                                                    multicolumn_format='c|'))

In [11]:
endogeneity_strength = 0.3
fname = 'cct'
for clever in [False]:
    resd = {}
    for n_t in [0, 5, 10]:
        resd[f'$n_x={n_t}$'] = {}
        for n in [1000, 5000]:
            lambda_l2_h = .1/n**(.9)
            nkey = f'$n={n}$'
            resd[f'$n_x={n_t}$'][nkey] = {}
            for iv_strength in [0.0, 0.5]:
                if n_t == 0 and iv_strength == 0.5:
                    continue

                true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_{endogeneity_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                ivkey = f'$\tilde{\rho}={iv_strength}$'
                resd[f'$n_x={n_t}$'][nkey][ivkey] = {}
                for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                    if method == 'ipw':
                        continue
                    data = np.array([r[it] for r in results])
                    if method in ['dr', 'tmle']:
                        cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                    else:
                        cov = 'NA'
                    resd[f'$n_x={n_t}$'][nkey][ivkey][method] = {
                                    'cov': cov,
                                    'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                    'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                    'std': f'{np.std(data[:, 0]):.3f}'}
                resd[f'$n_x={n_t}$'][nkey][ivkey] = pd.DataFrame(resd[f'$n_x={n_t}$'][nkey][ivkey])
            resd[f'$n_x={n_t}$'][nkey] = pd.concat(resd[f'$n_x={n_t}$'][nkey], sort=False)
        resd[f'$n_x={n_t}$'] = pd.concat(resd[f'$n_x={n_t}$'], sort=False)
    display(pd.concat(resd).unstack(level=3))
    print(pd.concat(resd).unstack(level=3).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||llll|llll|llll|',
                                                    multicolumn_format='c|'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,dr,dr,dr,dr,tmle,tmle,tmle,tmle,direct,direct,direct,direct
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,cov,rmse,bias,std,cov,rmse,bias,std,cov,rmse,bias,std
$n_x=0$,$n=1000$,$\rho=0.0$,96,0.086,0.022,0.083,97,0.078,0.022,0.075,,0.212,0.07,0.2
$n_x=0$,$n=5000$,$\rho=0.0$,98,0.033,0.011,0.031,98,0.035,0.013,0.033,,0.028,0.013,0.025
$n_x=5$,$n=1000$,$\rho=0.0$,91,0.179,0.034,0.176,92,0.191,0.015,0.191,,0.333,0.156,0.295
$n_x=5$,$n=1000$,$\rho=0.5$,65,0.366,0.261,0.257,55,0.427,0.302,0.301,,0.35,0.046,0.347
$n_x=5$,$n=5000$,$\rho=0.0$,66,0.144,0.099,0.105,64,0.16,0.112,0.114,,0.064,0.006,0.064
$n_x=5$,$n=5000$,$\rho=0.5$,63,0.135,0.089,0.102,64,0.138,0.075,0.116,,0.173,0.16,0.065
$n_x=10$,$n=1000$,$\rho=0.0$,95,0.141,0.005,0.141,90,0.151,0.003,0.151,,0.284,0.017,0.283
$n_x=10$,$n=1000$,$\rho=0.5$,71,0.242,0.16,0.182,63,0.351,0.234,0.261,,0.36,0.005,0.36
$n_x=10$,$n=5000$,$\rho=0.0$,95,0.027,0.01,0.025,90,0.046,0.03,0.035,,0.077,0.074,0.02
$n_x=10$,$n=5000$,$\rho=0.5$,61,0.127,0.088,0.091,66,0.126,0.06,0.111,,0.186,0.165,0.084


\begin{tabular}{lll||llll|llll|llll|}
\toprule
 &  &  & \multicolumn{4}{c|}{dr} & \multicolumn{4}{c|}{tmle} & \multicolumn{4}{c|}{direct} \\
 &  &  & cov & rmse & bias & std & cov & rmse & bias & std & cov & rmse & bias & std \\
\midrule
\multirow[t]{2}{*}{\textbf{$n_x=0$}} & \textbf{$n=1000$} & \textbf{$\rho=0.0$} & 96 & 0.086 & 0.022 & 0.083 & 97 & 0.078 & 0.022 & 0.075 & NA & 0.212 & 0.070 & 0.200 \\
\cline{2-15}
\textbf{} & \textbf{$n=5000$} & \textbf{$\rho=0.0$} & 98 & 0.033 & 0.011 & 0.031 & 98 & 0.035 & 0.013 & 0.033 & NA & 0.028 & 0.013 & 0.025 \\
\cline{1-15} \cline{2-15}
\multirow[t]{4}{*}{\textbf{$n_x=5$}} & \multirow[t]{2}{*}{\textbf{$n=1000$}} & \textbf{$\rho=0.0$} & 91 & 0.179 & 0.034 & 0.176 & 92 & 0.191 & 0.015 & 0.191 & NA & 0.333 & 0.156 & 0.295 \\
\textbf{} & \textbf{} & \textbf{$\rho=0.5$} & 65 & 0.366 & 0.261 & 0.257 & 55 & 0.427 & 0.302 & 0.301 & NA & 0.350 & 0.046 & 0.347 \\
\cline{2-15}
\textbf{} & \multirow[t]{2}{*}{\textbf{$n=5000$}} & \textbf{$\rho=0.0$} & 66

In [None]:
endogeneity_strength = 0.3
fname = 'cct'
for clever in [False]:
    resd = {}
    for n_t in [0, 5, 10]:
        resd[f'$n_x={n_t}$'] = {}
        for n in [1000, 5000]:
            lambda_l2_h = .1/n**(.9)
            nkey = f'$n={n}$'
            resd[f'$n_x={n_t}$'][nkey] = {}
            for iv_strength in [0.0, 0.5]:
                if n_t == 0 and iv_strength == 0.5:
                    continue

                true, results = joblib.load(f'res_fn_{fname}_n_{n}_n_t_{n_t}_stregth_{iv_strength}_{endogeneity_strength}_eps_{0.1}_clever_{clever}_l2h_{lambda_l2_h:.4f}.jbl')
                ivkey = f'$\tilde{\rho}={iv_strength}$'
                resd[f'$n_x={n_t}$'][nkey][ivkey] = {}
                for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
                    if method == 'ipw':
                        continue
                    data = np.array([r[it] for r in results])
                    confidence = .95
                    se = (data[:, 2] - data[:, 0]) / scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
                    confidence = .99
                    data[:, 1] = data[:, 0] - se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
                    data[:, 2] = data[:, 0] + se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
                    if method in ['dr', 'tmle']:
                        cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
                    else:
                        cov = 'NA'
                    resd[f'$n_x={n_t}$'][nkey][ivkey][method] = {
                                    'cov': cov,
                                    'rmse': f'{np.sqrt(np.mean((data[:, 0]-true)**2)):.3f}',
                                    'bias': f'{np.abs(np.mean((data[:, 0]-true))):.3f}',
                                    'std': f'{np.std(data[:, 0]):.3f}'}
                resd[f'$n_x={n_t}$'][nkey][ivkey] = pd.DataFrame(resd[f'$n_x={n_t}$'][nkey][ivkey])
            resd[f'$n_x={n_t}$'][nkey] = pd.concat(resd[f'$n_x={n_t}$'][nkey], sort=False)
        resd[f'$n_x={n_t}$'] = pd.concat(resd[f'$n_x={n_t}$'], sort=False)
    display(pd.concat(resd).unstack(level=3))
    print(pd.concat(resd).unstack(level=3).to_latex(bold_rows=True, multirow=True,
                                                    multicolumn=True, escape=False,
                                                    column_format='lll||llll|llll|llll|',
                                                    multicolumn_format='c|'))