In [54]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [55]:
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt

import sys
sys.path.append("../../../")

from chiseling.dgps.basic_binary_regression import BasicBinaryRegression
from chiseling.source.learners.baselearners_binary import logreg_learner
from chiseling.source.protocol.IRST import UnitRegistrar, IRSTBinary
from chiseling.source.strategies.chiseling import Chiseling
from chiseling.source.strategies.bonferroni_combiner import BonferroniCombiner

from chiseling.benchmark.benchmark import Benchmark

In [56]:
# Hyperparameters
d = 100
s = 5
rho = 0.2
n = 5000
theta = 0.6
tau = 1.365

test_thresh = 0.9
alpha = 0.05
alpha_min = 0

random_seed = 26

In [57]:
strategy_settings = {"reveal_batch_size": max(1, int(n * 0.01)),
                     "refit_batch_size": max(1, int(n * 0.05)),
                     "alpha_spending_fn": "uniform",
                     "boundary_strategy": "margin",
                     "n_min": 1,
                     "n_burn_in": 0.5,
                     "test_thresh": test_thresh,
                     "learner": logreg_learner,
                     "use_learner_weights": False,
                     "binary": True,
                     "alpha_min": alpha_min}

In [58]:
sampler = BasicBinaryRegression(d, s, rho, theta, tau, random_seed=random_seed)
X, Y = sampler.sample(n)

In [59]:
# Run chiseling
strategy = BonferroniCombiner(X=X,
                              Y=Y,
                              strategy="Chiseling",
                              strategy_settings=strategy_settings,
                              alpha=alpha,
                              splits=[0.2,0.5,0.8],
                              random_seed=random_seed)
strategy.run_strategy()

In [60]:
strategy.split_rejections

[True, True, True]

In [61]:
strategy.rejected_region_split

np.float64(0.8)

In [62]:
strategy.rejected_region

<chiseling.source.protocol.IRST.Region at 0x12fb574a0>

In [48]:
import pandas as pd

In [49]:
task_df = pd.read_csv("../../task_arrays/binary_regression.tasks.tsv", sep="\t")

In [50]:
task_df.iloc[0].to_dict()

{'dgp': 'BasicBinaryRegression',
 'n': 5000,
 'd': 100,
 's': 5,
 'rho': 0.2,
 'theta': 0.6,
 'tau': 1.365,
 'alpha': 0.05,
 'test_thresh': 0.9,
 'learner': 'logreg_learner',
 'binary': True,
 'subgroup_size': 0.1,
 'strategy': 'BonferroniCombiner',
 'bonf_strategy': 'Chiseling',
 'n_burn_in': 0.5,
 'reveal_batch_prop': 0.01,
 'refit_batch_prop': 0.05,
 'n_min': 1.0,
 'alpha_spending_fn': 'uniform',
 'boundary_strategy': 'margin',
 'alpha_min': 0.0,
 'use_learner_weights': False,
 'train_ratio': nan,
 'reveal_batch_size': 50.0,
 'refit_batch_size': 250.0,
 'n_sims': 50,
 'task_id': 7650,
 'random_seed': 363733648}

In [51]:
benchmark = Benchmark(task_df.iloc[0].to_dict())

In [52]:
benchmark.simulate_batch()

In [53]:
benchmark.simulation_results_df.rejected.mean()

np.float64(0.16)