In [1]:
import seaborn as sns
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
sns.set_style("whitegrid")

from attrdict import AttrDict
from drivers import *
from data_loaders import *


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
exp_config = AttrDict({
    'benchmark': {
        'name': 'synthetic_1D_sinusoidal',
        'NS': 1000,
        'config': {
            'Y0_PDF': 'piecewise_sinusoid',
            'Y1_PDF': 'low_base_rate_sinusoid',
            'PI_PDF': 'linear'
            }
    },
    
    # Experimental setup 
    'learn_weights': True,
    'learn_parameters': True,
    
    # Hyperparameters
    'n_epochs': 3,
    'n_runs': 1,
    
    'target_POs': [0, 1],
    
    'policy_gamma': 0,
    'train_ratio': .7,
    
    'split_erm': True,
    'crossfit_erm': True,
    'split_ccpe': True,
    'crossfit_ccpe': True,
})

error_params = [AttrDict({
    'alpha_0': 0.5,
    'alpha_1': 0,
    'beta_0': 0.1,
    'beta_1': 0 
})]

# baselines = [AttrDict({
#     'model': 'OBS',
#     'target': 'Y',
#     'conditional': False,
#     'sl': False,
#     'reweight': False
# }), AttrDict({
#     'model': 'OBS Oracle',
#     'target': 'YS',
#     'conditional': False,
#     'sl': False,
#     'reweight': False
# }), AttrDict({
#     'model': 'COM',
#     'target': 'Y',
#     'conditional': True,
#     'sl': False,
#     'reweight': False
# }), AttrDict({
#     'model': 'COM-SL',
#     'target': 'Y',
#     'conditional': True,
#     'sl': True,
#     'reweight': False
# }), AttrDict({
#     'model': 'RW',
#     'target': 'Y',
#     'conditional': True,
#     'sl': False,
#     'reweight': True
# }), AttrDict({
#     'model': 'RW-SL',
#     'target': 'Y',
#     'conditional': True,
#     'sl': True,
#     'reweight': True
# }), AttrDict({
#     'model': 'Proxy Oracle',
#     'target': f'Y',
#     'conditional': True,
#     'sl': False,
#     'reweight': False
# }), AttrDict({
#     'model': 'Target Oracle',
#     'target': f'YS',
#     'conditional': True,
#     'sl': True,
#     'reweight': False
# })]
baselines = [AttrDict({
    'model': 'Proxy Oracle',
    'target': f'Y',
    'conditional': True,
    'sl': False,
    'reweight': False
}), AttrDict({
    'model': 'Target Oracle',
    'target': f'YS',
    'conditional': True,
    'sl': True,
    'reweight': False
})]

po_results, te_results =  run_risk_minimization_exp(exp_config, baselines, error_params)



RUN: 0, alpha_0: 0.5, alpha_1: 0, beta_0: 0.1, beta_1: 0



Propensity model: 100%|████████████████████████████████| 3/3 [00:02<00:00,  1.50it/s]
CCPE: 0: 100%|█████████████████████████████████████████| 3/3 [00:01<00:00,  1.69it/s]
CCPE: 0: 100%|█████████████████████████████████████████| 3/3 [00:01<00:00,  1.73it/s]
CCPE: 1: 100%|█████████████████████████████████████████| 3/3 [00:01<00:00,  1.72it/s]
CCPE: 1: 100%|█████████████████████████████████████████| 3/3 [00:01<00:00,  1.76it/s]
ERM: Proxy Oracle: 100%|███████████████████████████████| 3/3 [00:01<00:00,  1.69it/s]
ERM: Proxy Oracle: 100%|███████████████████████████████| 3/3 [00:01<00:00,  1.75it/s]
ERM: Target Oracle: 100%|██████████████████████████████| 3/3 [00:01<00:00,  1.73it/s]
ERM: Target Oracle: 100%|██████████████████████████████| 3/3 [00:01<00:00,  1.70it/s]
Propensity model: 100%|████████████████████████████████| 3/3 [00:01<00:00,  1.69it/s]
CCPE: 0: 100%|█████████████████████████████████████████| 3/3 [00:01<00:00,  1.71it/s]
CCPE: 0: 100%|████████████████████████████████████████

In [3]:
po_results

Unnamed: 0,alpha_0,alpha_1,beta_0,beta_1,benchmark,AU-ROC,ACC,do,baseline
0,0.5,0,0.1,0,synthetic_1D_sinusoidal,0.350462,0.433333,0,Proxy Oracle
1,0.5,0,0.1,0,synthetic_1D_sinusoidal,0.873188,0.46,1,Proxy Oracle
2,0.5,0,0.1,0,synthetic_1D_sinusoidal,0.590437,0.47,0,Target Oracle
3,0.5,0,0.1,0,synthetic_1D_sinusoidal,0.873188,0.46,1,Target Oracle


In [None]:
po_results

In [30]:

n_splits = 3  # (function argument)
te_metrics, po_metrics = compute_crossfit_metrics(crossfit_preds, Y_test, n_splits, config)

In [33]:
pd.DataFrame(po_metrics)

Unnamed: 0,AU-ROC,ACC,do,baseline
0,0.400174,0.0,0,OBS
1,0.447338,0.0,0,OBS Oracle
2,0.540643,0.0,0,COM
3,0.513889,0.0,0,COM-SL
4,0.384615,0.0,0,RW
5,0.48206,0.0,0,RW-SL
6,0.529425,0.0,0,Proxy Oracle
7,0.615385,0.0,0,Target Oracle


In [None]:
X, Y = load_benchmark(exp_config.benchmark, error_params)

split_ix = int(.7*X.shape[0])

ccpe_dataset = AttrDict({
    'X_train': X.iloc[split_ix:, :],
    'Y_train': Y.iloc[split_ix:, :],
    'X_test': X.iloc[:split_ix, :],
    'Y_test': Y.iloc[:split_ix, :],
})



In [12]:
Y['pYS_0'].min()

1.94447574708434e-09

In [21]:
error_params

{'alpha_0': 0.1, 'alpha_1': 0.05, 'beta_0': 0.2, 'beta_1': 0.4}

In [6]:
py, alpha, beta = ccpe(ccpe_dataset, 0, exp_config, q_max=0, q_min=1)

Conditioning


Target: Y: 100%|████████████████████████████████| 20/20 [00:19<00:00,  1.01it/s]


In [7]:
alpha

0.97248465

In [8]:
beta

0.7411392331123352

In [6]:
beta

0.9778537675738335

In [10]:

np.quantile(py_hat, 0)


py.min()

0.28198478

In [12]:
train_ratio = .7
N = X.shape[0]

test_split_ix = int(N*train_ratio)

X_train, X_test = X.iloc[split_ix:, :], X.iloc[:split_ix, :]
Y_train, Y_test = Y.iloc[split_ix:, :], Y.iloc[:split_ix, :]

weight_split_ix

ccpe_train_split_ix


In [14]:
X_train.shape

(1500, 2)

In [6]:
X.shape

(5000, 1)

In [10]:
X.shape[1]

2