In [1]:
!git clone https://github.com/syrgkanislab/npiv_functionals.git

fatal: destination path 'npiv_functionals' already exists and is not an empty directory.


In [2]:
%cd npiv_functionals

/mnt/data/vasilis/npiv_functionals/notebooks/npiv_functionals


In [3]:
!python setup.py install

!!

        ********************************************************************************
        Please consider removing the following classifiers in favor of a SPDX license expression:

        License :: OSI Approved :: MIT License

        See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#license for details.
        ********************************************************************************

!!
  self._finalize_license_expression()
running install
!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()
!!

        ****************************************************

In [1]:
import os
import sys
sys.path.append(os.path.abspath('.'))

In [16]:
import warnings
warnings.simplefilter('ignore')
import itertools
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from mliv.dgps import get_data, get_tau_fn, fn_dict
from mliv.neuralnet.utilities import mean_ci
from mliv.neuralnet import AGMMEarlyStop as AGMM
from mliv.neuralnet.moments import avg_small_diff
from sklearn.ensemble import RandomForestRegressor
import joblib
from joblib import Parallel, delayed
from mliv.cct.mc2 import MC2
from mliv.rkhs import ApproxRKHSIVCV
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.kernel_approximation import Nystroem
from sklearn.pipeline import FeatureUnion, Pipeline
import scipy
import pandas as pd

In [3]:
# average finite difference moment
def moment_fn(x, test_fn):
    epsilon = 0.1
    t1 = np.hstack([x[:, [0]] + epsilon, x[:, 1:]])
    t0 = np.hstack([x[:, [0]] - epsilon, x[:, 1:]])
    return (test_fn(t1) - test_fn(t0)) / (2 * epsilon)

In [4]:
def moment_evals(x):
    epsilon = 0.1
    t1 = np.hstack([x[:, [0]] + epsilon, x[:, 1:]])
    t0 = np.hstack([x[:, [0]] - epsilon, x[:, 1:]])
    return t0, t1

In [5]:
it = 0
n = 5000
mc2_gen = MC2(n, 100, None, dimension=10, corr=0.5)
npvec, *_ = mc2_gen.data(it)
Z, X, Y = npvec['instrument'], npvec['endogenous'], npvec['response']
n_z = Z.shape[1]
n_x = X.shape[1]

In [6]:
Z_train, Z_val, X_train, X_val, Y_train, Y_val = train_test_split(
        Z, X, Y, test_size=.5, shuffle=True)

ztrans = Nystroem(n_components=100)
xtrans = Nystroem(n_components=100)
# ztrans = PolynomialFeatures(degree=2)
# xtrans = PolynomialFeatures(degree=2)
# ztrans = FeatureUnion([('poly', PolynomialFeatures(degree=2)), ('nys', Nystroem(n_components=10))])
# xtrans = FeatureUnion([('poly', PolynomialFeatures(degree=2)), ('nys', Nystroem(n_components=10))])
ztrans = Pipeline([('trans', ztrans), ('scale', StandardScaler())])
xtrans = Pipeline([('trans', xtrans), ('scale', StandardScaler())])
Psi = ztrans.fit_transform(Z_train)
xtrans.fit(np.vstack((X_train,) + moment_evals(X_train)))
Phi = xtrans.transform(X_train)
mPhi = moment_fn(X_train, xtrans.transform)

In [7]:
CovPsi = Psi.T @ Psi
CovPhiPsi = Phi.T @ Psi
Phival = xtrans.transform(X_val)
Psival = ztrans.transform(Z_val)
mPhival = moment_fn(X_val, xtrans.transform)
moment_val = np.mean(mPhival, axis=0)

best_violation = np.inf
for alpha in np.logspace(-6, 1, 5):
    regCov = scipy.linalg.pinv(CovPsi + alpha * n * np.eye(Psi.shape[1]))
    Sigma = CovPhiPsi @ regCov @ CovPsi @ regCov @  CovPhiPsi.T
    for beta in np.logspace(-6, 1, 5):
        xi = scipy.linalg.pinv(Sigma + beta * n * np.eye(Phi.shape[1])) @ np.sum(mPhi, axis=0)
        for gamma in np.logspace(-6, 1, 5):
            qparam = scipy.linalg.pinv(CovPsi + gamma * n * np.eye(Psi.shape[1])) @ CovPhiPsi.T @ xi

            representer_val = np.mean((Psival @ qparam).reshape(-1, 1) * Phival, axis=0)
            violation = np.linalg.norm(moment_val - representer_val, ord=2)
            if violation <= best_violation:
                best_alpha = alpha
                best_beta = beta
                best_gamma = gamma
                best_violation = violation

In [8]:
alpha = best_alpha
beta = best_beta
gamma = best_gamma
regCov = scipy.linalg.pinv(CovPsi + alpha * n * np.eye(Psi.shape[1]))
Sigma = CovPhiPsi @ regCov @ CovPsi @ regCov @  CovPhiPsi.T
xi = scipy.linalg.pinv(Sigma + beta * n * np.eye(Phi.shape[1])) @ np.sum(mPhi, axis=0)
qparam = scipy.linalg.pinv(CovPsi + gamma * n * np.eye(Psi.shape[1])) @ CovPhiPsi.T @ xi

In [9]:
best_alpha, best_beta, best_gamma, best_violation

(np.float64(0.0031622776601683794),
 np.float64(5.623413251903491e-05),
 np.float64(0.1778279410038923),
 np.float64(1.0505554273261328))

In [10]:
agmm = ApproxRKHSIVCV(n_components=200)
agmm.fit(Z_train, X_train, Y_train)

<mliv.rkhs.rkhsiv.ApproxRKHSIVCV at 0x7d92c136e3c0>

In [11]:
direct = moment_fn(X_val, agmm.predict).flatten()
residual = (Y_val - agmm.predict(X_val)).flatten()
qvalues = Psival @ qparam
pseudo = direct + qvalues * residual

reg = mean_ci(direct)
dr = mean_ci(pseudo)
ipw = mean_ci(qvalues * Y_val.flatten())
reg, ipw, dr

((np.float64(1.1503100471131114),
  np.float64(1.0262727674858683),
  np.float64(1.2743473267403544)),
 (np.float64(0.844874320849387),
  np.float64(0.07443483584815724),
  np.float64(1.6153138058506167)),
 (np.float64(1.2636576493871527),
  np.float64(0.9907476341217467),
  np.float64(1.5365676646525588)))

In [12]:
xivalues = xtrans.transform(X_val) @ xi
coef = np.mean(qvalues * residual) / np.mean(qvalues * xivalues)
pseudo_tmle = direct + coef * (mPhival @ xi)
pseudo_tmle += qvalues * (residual - coef * xivalues)
tmle = mean_ci(pseudo_tmle)
tmle

(np.float64(1.324173736562062),
 np.float64(1.0368812720008782),
 np.float64(1.611466201123246))

In [13]:
from sklearn.model_selection import KFold

def exp(it, n, dim, corr, fname='cct', iv_strength=None, endogeneity_strength=None):
    np.random.seed(it)
    if fname == 'cct':
        mc2_gen = MC2(n, 100, None, dimension=dim, corr=corr)
        npvec, *_ = mc2_gen.data(it)
        Z, X, Y = npvec['instrument'], npvec['endogenous'], npvec['response']
    else:
        Z, X, Y, _ = get_data(n, 1, iv_strength, get_tau_fn(fn_dict[fname]), 5, endogeneity_strength=endogeneity_strength)

    direct = np.zeros(n)
    residual = np.zeros(n)
    qvalues = np.zeros(n)
    xivalues = np.zeros(n)
    mxivalues = np.zeros(n)

    for train, test in KFold(n_splits=5, shuffle=True).split(Z):
        Z_train, Z_val, X_train, X_val, Y_train, Y_val = Z[train], Z[test], X[train], X[test], Y[train], Y[test]

        ztrans = Nystroem(n_components=200)
        xtrans = Nystroem(n_components=200)
        ztrans = Pipeline([('trans', ztrans), ('scale', StandardScaler())])
        xtrans = Pipeline([('trans', xtrans), ('scale', StandardScaler())])

        Psi = ztrans.fit_transform(Z_train)
        xtrans.fit(np.vstack((X_train,) + moment_evals(X_train)))
        Phi = xtrans.transform(X_train)
        mPhi = moment_fn(X_train, xtrans.transform)

        CovPsi = Psi.T @ Psi
        CovPhiPsi = Phi.T @ Psi
        Phival = xtrans.transform(X_val)
        Psival = ztrans.transform(Z_val)
        mPhival = moment_fn(X_val, xtrans.transform)
        moment_val = np.mean(mPhival, axis=0)

        best_violation = np.inf
        for alpha in np.logspace(-7, 1, 10):
            regCov = scipy.linalg.inv(CovPsi + alpha * n * np.eye(Psi.shape[1]))
            Sigma = CovPhiPsi @ regCov @ CovPsi @ regCov @  CovPhiPsi.T
            for beta in np.logspace(-7, 1, 10):
                xi = scipy.linalg.inv(Sigma + beta * n * np.eye(Phi.shape[1])) @ np.sum(mPhi, axis=0)
                for gamma in np.logspace(-7, 1, 10):
                    qparam = scipy.linalg.inv(CovPsi + gamma * n * np.eye(Psi.shape[1])) @ CovPhiPsi.T @ xi

                    # calculating the violation in the riesz representation property for each feature
                    #  E[m(W; phi)] = E[q(Z) * phi(X)]
                    # for every feature phi.
                    representer_val = np.mean((Psival @ qparam).reshape(-1, 1) * Phival, axis=0)
                    violation = np.linalg.norm(moment_val - representer_val, ord=2)
                    if violation <= best_violation:
                        best_alpha = alpha
                        best_beta = beta
                        best_gamma = gamma
                        best_violation = violation

        alpha = best_alpha
        beta = best_beta
        gamma = best_gamma
        regCov = scipy.linalg.inv(CovPsi + alpha * n * np.eye(Psi.shape[1]))
        Sigma = CovPhiPsi @ regCov @ CovPsi @ regCov @  CovPhiPsi.T
        xi = scipy.linalg.inv(Sigma + beta * n * np.eye(Phi.shape[1])) @ np.sum(mPhi, axis=0)
        qparam = scipy.linalg.inv(CovPsi + gamma * n * np.eye(Psi.shape[1])) @ CovPhiPsi.T @ xi

        agmm = ApproxRKHSIVCV(n_components=200)
        agmm.fit(Z_train, X_train, Y_train)

        direct[test] = moment_fn(X_val, agmm.predict).flatten()
        residual[test] = (Y_val - agmm.predict(X_val)).flatten()
        qvalues[test] = Psival @ qparam
        xivalues[test] = Phival @ xi
        mxivalues[test] = mPhival @ xi

    pseudo = direct + qvalues * residual

    reg = mean_ci(direct)
    dr = mean_ci(pseudo)
    ipw = mean_ci(qvalues * Y.flatten())

    coef = np.mean(qvalues * residual) / np.mean(qvalues * xivalues)
    pseudo_tmle = direct + coef * mxivalues
    pseudo_tmle += qvalues * (residual - coef * xivalues)
    tmle = mean_ci(pseudo_tmle)

    return dr, tmle, ipw, reg

In [None]:
def get_result_dict(results, true, alpha=0.95):
    df = {}
    for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
        if method == 'ipw':
            continue
        data = np.array([r[it] for r in results])
        confidence = .95
        se = (data[:, 2] - data[:, 0]) / scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
        if method in ['dr', 'tmle']:
            confidence = 0.95
            data[:, 1] = data[:, 0] - se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
            data[:, 2] = data[:, 0] + se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
            cov95 = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
            confidence = 0.99
            data[:, 1] = data[:, 0] - se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
            data[:, 2] = data[:, 0] + se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
            cov99 = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
        else:
            cov = 'NA'
        df[method] =  {'cov95': cov95, 'cov99': cov99,
                'rmse': f'{np.sqrt(np.mean((data[:, 0] - true)**2)):.3f}',
                'bias': f'{np.abs(np.mean((data[:, 0] - true))):.3f}'}
    return df

In [None]:
true = 1.0

for n in [1000, 5000]:
    for n_x in [0, 5, 10]:
        for corr in [0.0, 0.5]:
            if n_x == 0 and corr == 0.5:
                continue
            print(n, n_x, corr)
            results = Parallel(n_jobs=-1, verbose=3)(delayed(exp)(it, n, n_x, corr)
                                                            for it in range(1000))
            joblib.dump(results, f'rkhs_cct_n_{n}_n_x_{n_x}_corr_{corr}.jbl')
            df = pd.DataFrame(get_result_dict(results, true))
            display(df)

In [None]:
true = 1.0
res = {}
for n_x in [0, 5, 10]:
    res[f'${n_x}$'] = {}
    for n in [1000, 5000]:
        res[f'${n_x}$'][f'${n}$'] = {}
        for corr in [0.0, 0.5]:
            if n_x == 0 and corr == 0.5:
                continue
            results = joblib.load(f'rkhs_cct_n_{n}_n_x_{n_x}_corr_{corr}.jbl')
            res[f'${n_x}$'][f'${n}$'][f'${corr}$'] = pd.DataFrame(get_result_dict(results, true))
        res[f'${n_x}$'][f'${n}$'] = pd.concat(res[f'${n_x}$'][f'${n}$'], sort=False)
    res[f'${n_x}$'] = pd.concat(res[f'${n_x}$'], sort=False)
res = pd.concat(res, sort=False).unstack(level=3)
print(res.to_latex(bold_rows=True, multirow=True,
                   multicolumn=True, escape=False,
                   column_format='lll||llll|llll|llll|',
                   multicolumn_format='c|'))

In [None]:
for fname in ['abs', '2dpoly', 'sigmoid', 'sin']:
    for iv_strength in [.2, .5]:
        for endogeneity_strength in [0.1, 0.3]:
            Z, X, Y, true_fn = get_data(
                1000000, 1, iv_strength, get_tau_fn(fn_dict[fname]), 5, endogeneity_strength=endogeneity_strength)
            true = np.mean(moment_fn(X, true_fn))
            for n in [500, 1000, 2000]:
                print(n, fname, iv_strength, endogeneity_strength, true)
                results = Parallel(n_jobs=-1, verbose=3)(delayed(exp)(it, n, None, None,
                                                                      fname=fname,
                                                                      iv_strength=iv_strength,
                                                                      endogeneity_strength=endogeneity_strength)
                                                                for it in range(1000))
                joblib.dump((true, results), f'rkhs_fname_{fname}_n_{n}_iv_strength_{iv_strength}_{endogeneity_strength}.jbl')
                df = pd.DataFrame(get_result_dict(results, true))
                display(df)

500 abs 0.2 0.1 -0.0007951069338182391


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   26.3s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.6min finished


Unnamed: 0,dr,tmle,direct
cov95,94.0,95.0,95.0
cov99,99.0,99.0,99.0
rmse,0.176,3.514,0.107
bias,0.009,0.066,0.021


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


1000 abs 0.2 0.1 -0.0007951069338182391


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   25.1s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.7min finished


Unnamed: 0,dr,tmle,direct
cov95,96.0,94.0,94.0
cov99,99.0,99.0,99.0
rmse,0.112,1.148,0.082
bias,0.002,0.015,0.01


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


2000 abs 0.2 0.1 -0.0007951069338182391


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   27.0s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.9min finished


Unnamed: 0,dr,tmle,direct
cov95,95.0,94.0,94.0
cov99,99.0,99.0,99.0
rmse,0.077,0.452,0.059
bias,0.0,0.002,0.002


500 abs 0.2 0.3 0.0002801681965169937


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   23.9s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.6min finished


Unnamed: 0,dr,tmle,direct
cov95,95.0,96.0,96.0
cov99,98.0,99.0,99.0
rmse,0.179,2.71,0.112
bias,0.002,0.044,0.059


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


1000 abs 0.2 0.3 0.0002801681965169937


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   25.1s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.7min finished


Unnamed: 0,dr,tmle,direct
cov95,94.0,94.0,94.0
cov99,98.0,99.0,99.0
rmse,0.126,1.563,0.087
bias,0.003,0.062,0.031


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


2000 abs 0.2 0.3 0.0002801681965169937


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   27.1s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.9min finished


Unnamed: 0,dr,tmle,direct
cov95,95.0,95.0,95.0
cov99,99.0,99.0,99.0
rmse,0.086,1.152,0.061
bias,0.006,0.025,0.015


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


500 abs 0.5 0.1 0.0003265057188871935


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   24.2s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.6min finished


Unnamed: 0,dr,tmle,direct
cov95,94.0,97.0,97.0
cov99,98.0,99.0,99.0
rmse,0.08,1.997,0.049
bias,0.002,0.057,0.001


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


1000 abs 0.5 0.1 0.0003265057188871935


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   25.2s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.7min finished


Unnamed: 0,dr,tmle,direct
cov95,93.0,96.0,96.0
cov99,98.0,99.0,99.0
rmse,0.054,4.351,0.038
bias,0.003,0.154,0.002


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


2000 abs 0.5 0.1 0.0003265057188871935


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   27.1s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.9min finished


Unnamed: 0,dr,tmle,direct
cov95,92.0,96.0,96.0
cov99,98.0,99.0,99.0
rmse,0.037,1.292,0.028
bias,0.004,0.044,0.003


500 abs 0.5 0.3 0.00066860057958586


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   24.4s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.6min finished


Unnamed: 0,dr,tmle,direct
cov95,94.0,97.0,97.0
cov99,99.0,100.0,100.0
rmse,0.092,2.076,0.052
bias,0.006,0.007,0.006


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


1000 abs 0.5 0.3 0.00066860057958586


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   25.2s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.7min finished


Unnamed: 0,dr,tmle,direct
cov95,94.0,96.0,96.0
cov99,98.0,100.0,100.0
rmse,0.061,0.673,0.038
bias,0.004,0.005,0.002


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


2000 abs 0.5 0.3 0.00066860057958586


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   27.1s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.9min finished


Unnamed: 0,dr,tmle,direct
cov95,96.0,98.0,98.0
cov99,99.0,99.0,99.0
rmse,0.042,0.931,0.029
bias,0.001,0.005,0.001


500 2dpoly 0.2 0.1 -0.8185401941888014


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   24.3s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.6min finished


Unnamed: 0,dr,tmle,direct
cov95,94.0,94.0,94.0
cov99,98.0,99.0,99.0
rmse,0.188,2.941,0.222
bias,0.025,0.185,0.192


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


1000 2dpoly 0.2 0.1 -0.8185401941888014


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   24.9s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.7min finished


Unnamed: 0,dr,tmle,direct
cov95,95.0,94.0,94.0
cov99,99.0,99.0,99.0
rmse,0.118,1.855,0.111
bias,0.013,0.045,0.074


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


2000 2dpoly 0.2 0.1 -0.8185401941888014


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   27.1s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.9min finished


Unnamed: 0,dr,tmle,direct
cov95,96.0,95.0,95.0
cov99,99.0,99.0,99.0
rmse,0.082,0.514,0.066
bias,0.008,0.008,0.025


500 2dpoly 0.2 0.3 -1.0462858494618295


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   24.4s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.6min finished


Unnamed: 0,dr,tmle,direct
cov95,92.0,94.0,94.0
cov99,97.0,99.0,99.0
rmse,0.261,5.065,0.406
bias,0.055,0.014,0.39


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


1000 2dpoly 0.2 0.3 -1.0462858494618295


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   25.2s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.7min finished


Unnamed: 0,dr,tmle,direct
cov95,93.0,93.0,93.0
cov99,97.0,98.0,98.0
rmse,0.186,3.051,0.187
bias,0.026,0.126,0.144


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


2000 2dpoly 0.2 0.3 -1.0462858494618295


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   27.1s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.9min finished


Unnamed: 0,dr,tmle,direct
cov95,95.0,94.0,94.0
cov99,98.0,98.0,98.0
rmse,0.126,1.182,0.108
bias,0.007,0.042,0.062


500 2dpoly 0.5 0.1 -0.5675457703338429


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   24.5s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.6min finished


Unnamed: 0,dr,tmle,direct
cov95,94.0,96.0,96.0
cov99,99.0,100.0,100.0
rmse,0.079,2.114,0.059
bias,0.024,0.074,0.023


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


1000 2dpoly 0.5 0.1 -0.5675457703338429


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   25.1s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.7min finished


Unnamed: 0,dr,tmle,direct
cov95,92.0,94.0,94.0
cov99,98.0,99.0,99.0
rmse,0.053,4.161,0.042
bias,0.014,0.176,0.013


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


2000 2dpoly 0.5 0.1 -0.5675457703338429


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   27.2s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.9min finished


Unnamed: 0,dr,tmle,direct
cov95,92.0,95.0,95.0
cov99,98.0,99.0,99.0
rmse,0.036,0.46,0.028
bias,0.01,0.029,0.009


500 2dpoly 0.5 0.3 -0.5875408904428301


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   24.4s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.6min finished


Unnamed: 0,dr,tmle,direct
cov95,92.0,96.0,96.0
cov99,97.0,99.0,99.0
rmse,0.106,1.587,0.087
bias,0.023,0.002,0.052


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


1000 2dpoly 0.5 0.3 -0.5875408904428301


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   25.0s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.7min finished


Unnamed: 0,dr,tmle,direct
cov95,92.0,95.0,95.0
cov99,97.0,99.0,99.0
rmse,0.069,0.905,0.059
bias,0.011,0.03,0.031


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


2000 2dpoly 0.5 0.3 -0.5875408904428301


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   27.2s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.9min finished


Unnamed: 0,dr,tmle,direct
cov95,92.0,96.0,96.0
cov99,98.0,99.0,99.0
rmse,0.049,1.042,0.04
bias,0.009,0.022,0.018


500 sigmoid 0.2 0.1 0.3843573070801452


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   24.4s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.6min finished


Unnamed: 0,dr,tmle,direct
cov95,95.0,96.0,96.0
cov99,99.0,99.0,99.0
rmse,0.164,3.537,0.106
bias,0.013,0.065,0.017


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


1000 sigmoid 0.2 0.1 0.3843573070801452


[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:   25.0s
[Parallel(n_jobs=-1)]: Done 352 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.7min finished


Unnamed: 0,dr,tmle,direct
cov95,96.0,94.0,94.0
cov99,99.0,99.0,99.0
rmse,0.103,0.911,0.074
bias,0.007,0.009,0.009


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 80 concurrent workers.


2000 sigmoid 0.2 0.1 0.3843573070801452


In [None]:
def get_result_dict(results, true, alpha=0.95):
    df = {}
    for it, method in enumerate(['dr', 'tmle', 'ipw', 'direct']):
        data = np.array([r[it] for r in results])
        confidence = .95
        se = (data[:, 2] - data[:, 0]) / scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
        confidence = alpha
        data[:, 1] = data[:, 0] - se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
        data[:, 2] = data[:, 0] + se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
        if method in ['dr', 'tmle']:
            cov = f'{100*np.mean((data[:, 1] <= true) & (true <= data[:, 2])):.0f}'
        else:
            cov = 'NA'
        df[method] =  {'cov': cov,
                'rmse': f'{np.sqrt(np.mean((data[:, 0] - true)**2)):.3f}',
                'bias': f'{np.abs(np.mean((data[:, 0] - true))):.3f}'}
    return df

In [None]:
for alpha in [0.95, 0.99]:
    res = {}
    for fname in ['abs', '2dpoly', 'sigmoid', 'sin']:
        res[fname] = {}
        for n in [500, 1000, 2000]:
            res[fname][f'${n}$'] = {}
            for iv_strength in [.2, .5]:
                for endogeneity_strength in [0.05, 0.1]:
                    true, results = joblib.load(f'rkhs_fname_{fname}_n_{n}_iv_strength_{iv_strength}_{endogeneity_strength}.jbl')
                    df = pd.DataFrame(get_result_dict(results, true, alpha=alpha))
                    res[fname][f'${n}$'][f'${iv_strength}$'][f'${endogeneity_strength}$'] = df
                res[fname][f'${n}$'][f'${iv_strength}$'] = pd.concat(res[fname][f'${n}$'][f'${iv_strength}$'], sort=False)
            res[fname][f'${n}$'] = pd.concat(res[fname][f'${n}$'], sort=False)
        res[fname] = pd.concat(res[fname], sort=False).unstack(level=3)
    res = pd.concat(res, sort=False).unstack(level=4)
    print(res.to_latex(bold_rows=True, multirow=True,
                       multicolumn=True, escape=False,
                       column_format='llll||lll|lll|lll|lll|',
                       multicolumn_format='c|'))