In [None]:
import pandas as pd
from pathlib import Path

# Collate ACIC

In [None]:
experiment_name = input("Experiment_name. This is the file_name with which u issued the command to run.")
acic_dir = Path(f"results/experiments_benchmarking/acic2016")
acic_format = "v{}_" + experiment_name + ".csv"
collated_acic = {
    "seed": [],
}
acic_dfs = []
for seed in [2, 7, 26]:
    acic_file = acic_format.format(str(seed), str(seed))
    df = pd.read_csv(acic_dir / acic_file)
    acic_dfs.append(df)
    collated_acic["seed"].extend([seed] * len(df))
acic_df = pd.concat(acic_dfs)

for key, value in collated_acic.items():
    acic_df[key] = value
print(acic_df)

In [None]:
acic_df.to_csv(acic_dir / f"collated_results-{experiment_name}.csv")

# Collate Twins

In [None]:
twins_dir = Path("results/experiments_inductive_bias/twins")
experiment_name = input("Experiment name. This is the file_name with which u issued the command to run.")
twins_format = experiment_name + "_{}_{}.csv"
collated_twins = {
    "prop_t": [],
    "num_trn": []
}
twins_df = []
for prop_treated in [0.1, 0.25, 0.5, 0.75, 0.9]:
    for num_trn in [500, 1000, 4000, "None"]:
        twins_file = twins_dir / twins_format.format(str(prop_treated), str(num_trn))
        df = pd.read_csv(twins_file)
        
        collated_twins["prop_t"].extend([prop_treated] * len(df))
        collated_twins["num_trn"].extend([num_trn] * len(df))
        twins_df.append(df)
twins_df = pd.concat(twins_df)
for key, value in collated_twins.items():
    twins_df[key] = value
print(twins_df)

In [None]:
twins_df.to_csv(twins_dir / f"collated_results-{experiment_name}.csv")

# Final Table

## Discrete

In [None]:
import pandas as pd
import torch
import numpy as np
from scipy.stats import ttest_ind
from pathlib import Path
from copy import deepcopy

In [None]:
experiment_name = input("Experiment name")

In [None]:
def t_test(x, y, alternative="lesser"):
    # Code taken from: https://stackoverflow.com/questions/15984221/how-to-perform-two-sample-one-tailed-t-test-with-numpy-scipy
    if type(x) == torch.Tensor:
        x = x.numpy()
    if type(y) == torch.Tensor:
        y = y.numpy()
    _, double_p = ttest_ind(x, y, equal_var=False)
    if alternative == "both-sided":
        pval = double_p
    elif alternative == "greater":
        if np.mean(x) > np.mean(y):
            pval = double_p / 2.0
        else:
            pval = 1.0 - double_p / 2.0
    elif alternative == "lesser":
        if np.mean(x) < np.mean(y):
            pval = double_p / 2.0
        else:
            pval = 1.0 - double_p / 2.0
    return pval

In [None]:
algos = ['TNet', 'RNet', 'DRNet', 'XNet', 'TARNet', 'CFRNet', 'DragonNet', 'FlexTENet', 'PairNet']
print(algos)

In [None]:
ihdp_csv = pd.read_csv(f"results/experiments_benchmarking/ihdp/{experiment_name}.csv")
acic_csv = pd.read_csv(f"results/experiments_benchmarking/acic2016/collated_results-{experiment_name}.csv")
twins_csv = pd.read_csv(f"results/experiments_inductive_bias/twins/collated_results-{experiment_name}.csv")

csvs = {
    "IHDP": ihdp_csv,
    "ACIC": acic_csv,
    "Twins": twins_csv
}

ihdp_csv_main = pd.read_csv(f"results/experiments_benchmarking/ihdp/collated_results_main-ihdp.csv")
acic_csv_main = pd.read_csv(f"results/experiments_benchmarking/acic2016/collated_results_main-acic.csv")
twins_csv_main = pd.read_csv(f"results/experiments_inductive_bias/twins/collated_results_cate_in_out.csv")

main_csvs = {
    "IHDP": ihdp_csv_main,
    "ACIC": acic_csv_main,
    "Twins": twins_csv_main
}

In [None]:
ihdp_csv_main.columns

In [None]:
df_dict = {
    "ITE in": [],
    "ITE out": [],
}

results_df = []

for did, dataset in enumerate(csvs.keys()):
    print(dataset)
    
    ds_df = deepcopy(df_dict)
    
    df = csvs[dataset]
    df_main = main_csvs[dataset]
        
    for algo in algos:
        ref_algo = algo if algo != "PairNet" else "FCTNBRNet"
        
        if dataset == 'Twins':
            in_name = '_cate_in'
            out_name = '_cate_out'
        else:
            in_name = '_in'
            out_name = '_out'
        
        t_ref_in = df_main[ref_algo + in_name].values
        t_ref_out = df_main[ref_algo + out_name].values
        
        in_name = algo + in_name
        out_name = algo + out_name
        
        t_in = df[in_name].values
        t_out = df[out_name].values
        
        pval_in = t_test(t_ref_in, t_in)
        pval_out = t_test(t_ref_out, t_out)
        
        mean_in = np.mean(t_ref_in) - np.mean(t_in)
        mean_out = np.mean(t_ref_out) - np.mean(t_out)
        
        ds_df["ITE in"].append(f"{round(mean_in, 2):.2f} ({round(pval_in, 2):.2f})")
        ds_df["ITE out"].append(f"{round(mean_out, 2):.2f} ({round(pval_out, 2):.2f})")
    
    results_df.append(pd.DataFrame(ds_df))
        

In [None]:
results_all = pd.concat(results_df, axis=1)
results_all.index = algos
print(results_all.to_latex())