In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from amortized_bootstrap import Amortized_bootstrap_learning

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
# I just use the first dataset for binary classification that I found
data = load_breast_cancer()
scaler = StandardScaler()

# Take first two features for simplicity, but the number can be arbitrary
X_0 = data.data[:, :2]
X_0 = scaler.fit_transform(X_0)
X_0 = np.hstack((X_0, np.ones((len(X_0), 1))))
y = data.target

In [3]:
X_0

array([[ 1.09706398, -2.07333501,  1.        ],
       [ 1.82982061, -0.35363241,  1.        ],
       [ 1.57988811,  0.45618695,  1.        ],
       ...,
       [ 0.70228425,  2.0455738 ,  1.        ],
       [ 1.83834103,  2.33645719,  1.        ],
       [-1.80840125,  1.22179204,  1.        ]])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
                                    X_0, y, test_size=0.1, stratify=y, random_state=42)

In [5]:
print('Class ratio:\ntrain: {:2.2%}\ntest: {:2.2%}'.format(y_train.sum()/len(y_train),
                                                   y_test.sum()/len(y_test)))

Class ratio:
train: 62.70%
test: 63.16%


In [6]:
class Implicit_model(nn.Module):
    def __init__(self, input_size, output_size, hid_size=256):
        super(Implicit_model, self).__init__()
        self.fc1 = nn.Linear(input_size, hid_size)
        self.fc2 = nn.Linear(hid_size, hid_size)
        self.fc3 = nn.Linear(hid_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Implicit_model(12, 1)
print(net)

Implicit_model(
  (fc1): Linear(in_features=12, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=1, bias=True)
)


In [7]:
# this is P(y|X, theta)
def logistic_model(x, A):
    return F.sigmoid(torch.matmul(x, A.T))

In [8]:
from amortized_bootstrap import Amortized_bootstrap_learning

implicit_model = Implicit_model(1, X_train.shape[1]).double()
criterion = torch.nn.BCELoss(size_average=True) #loss for classification
Amortized_bootstrap_learning(implicit_model, logistic_model, criterion, X_train, y_train, n_bootstrap_sampling= 100,
                             n_epochs= 100, batch_size=64, learning_rate=0.02)



Loss:  0.26582251472593654
Loss:  0.2500077381677759
Loss:  0.2555500744603407
Loss:  0.2675235210440841
Loss:  0.2805038805698827
Loss:  0.21319385006257813
Loss:  0.25692396503759196
Loss:  0.1818853313071589
Loss:  0.2576655498577998
Loss:  0.2851795022070137


In [160]:
# now we can use trained weights of implicit model to perform bagging

In [9]:
# as an example, consider bagging with 10 logistic classifiers
# now that we have a sampler from bootstrapped statistic, we don't need 
# to bootstrap our initial data

bagging_y = []
for i in range(10):
    ksi = np.random.normal(size=(1, 1)).reshape(-1, 1)
    theta = implicit_model(torch.tensor(ksi).double())
    y_pred = logistic_model(torch.tensor(X_test), theta)
    bagging_y.append(y_pred.detach().numpy())

y_probs = np.array(bagging_y).mean(axis=0)