# ASBE - Automatic Stopping for Batch Experiments

> API details.

In [None]:
#hide
from nbdev import *

In [None]:
%nbdev_default_export core

Cells will be exported to asbe.core,
unless a different module is specified after an export flag: `%nbdev_export special.module`


In [None]:
%nbdev_export
from modAL.models.base import BaseLearner
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from typing import Union, Optional
from copy import deepcopy
import numpy as np

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [None]:
%nbdev_export
def random_batch_sampling(classifier, X_pool, n2):
    "Randomly sample a batch from a pool of unlabaled samples"
    n_samples = len(X_pool)
    query_idx = np.random.choice(range(n_samples), size=n2)
    return query_idx, X_pool[query_idx]

estimator_type = ClassifierMixin
class ASLearner(BaseLearner):
    """A(ctively)S(topping)Learner class for automatic stopping in batch-mode AL"""
    def __init__(self,
                 estimator: estimator_type=None, 
                 query_strategy=None,
                 assignment_fc=None,
                 X_training: np.ndarray = None,
                 t_training: np.ndarray = None,
                 y_training: np.ndarray = None,
                 X_pool: np.ndarray = None
                ) -> None:
        self.estimator = estimator#,
                                  #t_training = t_training,
                                  #y_training = y_training,
                                  #X_test     = X_pool)
        self.query_strategy = query_strategy
        self.assignment_fc = assignment_fc
        self.X_training = X_training
        self.y_training = y_training
        self.t_training = t_training
        self.X_pool     = X_pool
        self.estimator.__dict__.update(X_training = X_training,
                                       y_training  = y_training,
                                       t_training  = t_training,
                                       X_test      = X_pool)

        
    def teach(self, X, X_query):
        return(self.estimator.fit(np.hstack(X, X_query)))
    
    def fit(self):
        self.estimator.fit()

In [None]:
%nbdev_export
class ITEEstimator(BaseEstimator):
    """ Class for building a naive estimator for ITE estimation
    """
    def __init__(self,
                 model: estimator_type = None,
                 X_training: np.ndarray = None,
                 t_training: np.ndarray = None,
                 y_training: np.ndarray = None,
                 X_test: np.ndarray = None,
                 two_model: bool = False,
                 **kwargs
                ) -> None:
        self.model = model
        self.X_training = X_training
        self.y_training = y_training
        self.t_training = t_training
        self.two_model  = two_model
        self.X_test = X_test
        if X_training is not None:
            self.N_training = X_training.shape[0]

    def fit(self):
        if "N_training" not in self.__dict__:
            self.N_training = self.X_training.shape[0]
        if self.two_model:
            self.m1 = deepcopy(self.model)
            control_ix = np.where(self.t_training == 0)[0]
            self.model.fit(self.X_training[control_ix,:],
                           self.y_training[control_ix])
            self.m1.fit(self.X_training[-control_ix,:],
                        self.y_training[-control_ix])
        else:
            self.model.fit(np.hstack((self.X_training,
                                      self.t_training.reshape((self.N_training, -1)))),
                           self.y_training)
            
    def predict(self, X=None):
        if X is None:
            X = self.X_test
        if self.two_model:
            self.y1_preds = self.m1.predict_proba(X)[:,1]
            self.y0_preds = self.model.predict_proba(X)[:,1]
        else:
            N_test = X.shape[0]
            self.y1_preds = self.model.predict_proba(
                                np.hstack((X,
                                np.ones(N_test).reshape(-1,1))))[:,1]
            self.y0_preds = self.model.predict_proba(
                np.hstack((X,
                           np.zeros(N_test).reshape(-1,1))))[:,1]
        return self.y1_preds - self.y0_preds, self.y1_preds, self.y0_preds

In [None]:
X = np.random.normal(size = 1000).reshape((500,2))
t = np.random.binomial(n = 1, p = 0.5, size = 500)
y = np.random.binomial(n = 1, p = 1/(1+np.exp(X[:, 1]*2 + t*3)))
X_test = np.random.normal(size = 200).reshape((100,2))
a = ITEEstimator(LogisticRegression(solver="lbfgs"), X, t, y, two_model = True)
a.fit()
assert type(a.model) == LogisticRegression  # test assigning a model
assert a.X_training.shape  == (500,2)       # test data passing for class
assert a.model.intercept_ is not None

In [None]:
# a = ITEEstimator(RandomForestClassifier(), X, t, y, X_test, two_model = False )


In [None]:
asl = ASLearner(estimator = ITEEstimator(model = RandomForestClassifier()), 
         query_strategy=random_batch_sampling,
         X_training=X,t_training=t,y_training=y)

In [None]:
asl.estimator.fit()

