In [1]:
import numpy as np
import pandas as pd

from itertools import product

In [2]:
np.random.seed(42)

In [3]:
d = 20
s_prog = 20
s_effect = 20
theta_prog = 0
theta_effect = 0.4
tau = 0

alpha = 0.05

random_seed = 42

chiseling_n_sims_per_task = 50
chiseling_n_tasks_per_setting = 50
datasplit_n_sims_per_task = 50
datasplit_n_tasks_per_setting = 50

In [4]:
n_space = [500, 1500, 4000]
train_ratio_space = np.round(np.linspace(0.05,0.95,19), 4)

In [5]:
base_settings = {"task_id": None,
                 "dgp": "BasicLinearRCT",
                 "n": None,
                 "d": d,
                 "s_prog": s_prog,
                 "s_effect": s_effect,
                 "theta_prog": theta_prog,
                 "theta_effect": theta_effect,
                 "tau": tau,
                 "ipw_transform": True,
                 "alpha": alpha,
                 "random_seed": None,
                 "strategy": None,
                 "test_thresh": 0,
                 "learner": "linreg_learner",
                 "n_burn_in": None,
                 "n_min": 2,
                 "margin_width": 0,
                 "alpha_min": 0,
                 "alpha_spending_fn": "instantaneous",
                 "substitute_t_test": True,
                 "train_ratio": None,
                 "n_sims": None}

In [6]:
task_data = []
CURR_TASK_ID = 0

In [7]:
strategy = "Chiseling"
parameter_settings = product(n_space, train_ratio_space)

for n, train_ratio in parameter_settings:
    for i in range(chiseling_n_tasks_per_setting):
        rs = np.random.randint(0, 2**32 - 1)
        task_settings = dict(base_settings)
        task_settings["task_id"] = CURR_TASK_ID
        task_settings["strategy"] = strategy
        task_settings["n"] = n
        task_settings["n_burn_in"] = train_ratio
        task_settings["n_sims"] = chiseling_n_sims_per_task
        task_settings["random_seed"] = rs
        task_settings["task_label"] = "chiseling"
        task_data.append(task_settings)
        CURR_TASK_ID += 1

In [8]:
strategy = "DataSplittingStrategy"
parameter_settings = product(n_space, train_ratio_space)

for n, train_ratio in parameter_settings:
    for i in range(datasplit_n_tasks_per_setting):
        rs = np.random.randint(0, 2**32 - 1)
        task_settings = dict(base_settings)
        task_settings["task_id"] = CURR_TASK_ID
        task_settings["strategy"] = strategy
        task_settings["n"] = n
        task_settings["train_ratio"] = train_ratio
        task_settings["n_sims"] = datasplit_n_sims_per_task
        task_settings["random_seed"] = rs
        task_settings["task_label"] = "datasplit"
        task_data.append(task_settings)
        CURR_TASK_ID += 1

In [9]:
strategy = "OracleStrategy"
parameter_settings = n_space

for n in parameter_settings:
    for i in range(datasplit_n_tasks_per_setting):
        rs = np.random.randint(0, 2**32 - 1)
        task_settings = dict(base_settings)
        task_settings["task_id"] = CURR_TASK_ID
        task_settings["strategy"] = strategy
        task_settings["n"] = n
        task_settings["n_sims"] = datasplit_n_sims_per_task
        task_settings["random_seed"] = rs
        task_settings["task_label"] = "oracle"
        task_data.append(task_settings)
        CURR_TASK_ID += 1

In [10]:
task_df = pd.DataFrame(task_data)

In [11]:
task_df.head()

Unnamed: 0,task_id,dgp,n,d,s_prog,s_effect,theta_prog,theta_effect,tau,ipw_transform,...,learner,n_burn_in,n_min,margin_width,alpha_min,alpha_spending_fn,substitute_t_test,train_ratio,n_sims,task_label
0,0,BasicLinearRCT,500,20,20,20,0,0.4,0,True,...,linreg_learner,0.05,2,0,0,instantaneous,True,,50,chiseling
1,1,BasicLinearRCT,500,20,20,20,0,0.4,0,True,...,linreg_learner,0.05,2,0,0,instantaneous,True,,50,chiseling
2,2,BasicLinearRCT,500,20,20,20,0,0.4,0,True,...,linreg_learner,0.05,2,0,0,instantaneous,True,,50,chiseling
3,3,BasicLinearRCT,500,20,20,20,0,0.4,0,True,...,linreg_learner,0.05,2,0,0,instantaneous,True,,50,chiseling
4,4,BasicLinearRCT,500,20,20,20,0,0.4,0,True,...,linreg_learner,0.05,2,0,0,instantaneous,True,,50,chiseling


In [12]:
print(task_df.shape)

(5850, 24)


In [13]:
task_df.to_csv("../../task_arrays/simple_demo_shifted.tasks.tsv", sep="\t", index=False)