In [14]:
!pip install einops

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# TabTransf + naive

In [15]:
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn, einsum
from einops import rearrange
from tqdm import tqdm
# helpers

def exists(val):
    return val is not None


def default(val, d):
    return val if exists(val) else d


# classes

class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn

    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) + x


class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn

    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)


# attention

class GEGLU(nn.Module):
    def forward(self, x):
        x, gates = x.chunk(2, dim=-1)
        return x * F.gelu(gates)


class FeedForward(nn.Module):
    def __init__(self, dim, mult=4, dropout=0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, dim * mult * 2),
            GEGLU(),
            nn.Dropout(dropout),
            nn.Linear(dim * mult, dim)
        )

    def forward(self, x, **kwargs):
        return self.net(x)


class Attention(nn.Module):
    def __init__(
            self,
            dim,
            heads=8,
            dim_head=16,
            dropout=0.
    ):
        super().__init__()
        inner_dim = dim_head * heads
        self.heads = heads
        self.scale = dim_head ** -0.5

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
        self.to_out = nn.Linear(inner_dim, dim)

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h = self.heads
        q, k, v = self.to_qkv(x).chunk(3, dim=-1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), (q, k, v))
        sim = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale

        attn = sim.softmax(dim=-1)
        attn = self.dropout(attn)

        out = einsum('b h i j, b h j d -> b h i d', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)', h=h)
        return self.to_out(out)


# transformer

class Transformer(nn.Module):
    def __init__(self, num_tokens, dim, depth, heads, dim_head, attn_dropout, ff_dropout):
        super().__init__()
        self.embeds = nn.Embedding(num_tokens, dim)     # all number of tokens to hidd dim
        self.layers = nn.ModuleList([])

        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Residual(PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=attn_dropout))),
                Residual(PreNorm(dim, FeedForward(dim, dropout=ff_dropout))),
            ]))

    def forward(self, x):
        x = self.embeds(x)

        for attn, ff in self.layers:
            x = attn(x)
            x = ff(x)

        return x


# mlp

class MLP(nn.Module):
    def __init__(self, dims, act=None):
        super().__init__()
        dims_pairs = list(zip(dims[:-1], dims[1:]))
        layers = []
        for ind, (dim_in, dim_out) in enumerate(dims_pairs):
            is_last = ind >= (len(dims_pairs) - 1)
            linear = nn.Linear(dim_in, dim_out)
            layers.append(linear)

            # removed last layer to get logits
            # if is_last:
            #     continue
            # act = default(act, nn.ReLU())
            # layers.append(act)

        self.mlp = nn.Sequential(*layers)

    def forward(self, x):
        return self.mlp(x)


# main class

class TabTransformer(nn.Module):
    def __init__(
            self,
            *,
            categories,
            num_continuous,
            dim,
            depth,
            heads,
            dim_head=16,
            dim_out=1,
            mlp_hidden_mults=(4, 2),
            mlp_act=None,
            num_special_tokens=2,
            continuous_mean_std=None,
            attn_dropout=0.,
            ff_dropout=0.,
            seed=42
    ):
        super().__init__()
        assert all(map(lambda n: n > 0, categories)), 'number of each category must be positive'

        self.categories = categories
        torch.manual_seed(seed)

        # categories related calculations

        self.num_categories = len(categories)  # len of input sequences
        self.num_unique_categories = sum(categories)  # number of all categories

        # create category embeddings table

        self.num_special_tokens = num_special_tokens  # for missing?
        total_tokens = self.num_unique_categories + num_special_tokens

        # for automatically offsetting unique category ids to the correct position in the categories embedding table

        categories_offset = F.pad(torch.tensor(list(categories)), (1, 0), value=num_special_tokens)  # ??
        categories_offset = categories_offset.cumsum(dim=-1)[:-1]  # cumulative sum
        # if categ = (2, 3, 2, 3), categories_offset = tensor([0, 2, 5, 7]), bias for column categs
        self.register_buffer('categories_offset', categories_offset)  # save it and use in forward

        # continuous

        if exists(continuous_mean_std):
            assert continuous_mean_std.shape == (num_continuous,
                                                 2), f'continuous_mean_std must have a shape of ({num_continuous}, 2) where the last dimension contains the mean and variance respectively'
        self.register_buffer('continuous_mean_std', continuous_mean_std)

        self.norm = nn.LayerNorm(num_continuous)
        self.num_continuous = num_continuous

        # transformer

        self.transformer = Transformer(
            num_tokens=total_tokens,
            dim=dim,
            depth=depth,
            heads=heads,
            dim_head=dim_head,
            attn_dropout=attn_dropout,
            ff_dropout=ff_dropout
        )

        # mlp to logits

        input_size = (dim * self.num_categories) + num_continuous
        l = input_size // 8

        hidden_dimensions = list(map(lambda t: l * t, mlp_hidden_mults))
        all_dimensions = [input_size, *hidden_dimensions, dim_out]

        self.mlp = MLP(all_dimensions, act=mlp_act)

    def forward(self, x_categ, x_cont):
        assert x_categ.shape[
                   -1] == self.num_categories, f'you must pass in {self.num_categories} values for your categories input'
        x_categ += self.categories_offset    #    TODO

        x = self.transformer(x_categ)

        flat_categ = x.flatten(1)

        assert x_cont.shape[
                   1] == self.num_continuous, f'you must pass in {self.num_continuous} values for your continuous input'

        if exists(self.continuous_mean_std):
            mean, std = self.continuous_mean_std.unbind(dim=-1)  # splits tensor into 2 parts
            x_cont = (x_cont - mean) / std

        normed_cont = self.norm(x_cont)  # norm over all batch

        x = torch.cat((flat_categ, normed_cont), dim=-1)
        return self.mlp(x)

    def naive_recover(self, data_nan, nans_pos, device):
        # now x_categ contains NaNs in its rows
        # our task is to find the most suitable token in the corresponding column tokens
        self.eval()
        recovered_labels = []
        for j in tqdm(range(len(nans_pos))):
            i = nans_pos[j]
            # create tensor with many variants for substitution
            row = np.array([data_nan[j]] * self.categories[i])
            row[:, i] = np.arange(self.categories[i])
            # get contextual embeddings
            x_categ = (torch.LongTensor(row).to(device) + self.categories_offset)    # (N, input)
            with torch.no_grad():
                w_categ = self.transformer(x_categ)    # (N, input, hidden)
            # choose the best
            v = w_categ[:, i]
            w_categ = torch.cat((w_categ[:, :i], w_categ[:, i+1:]), dim=1)
            w_pairs = torch.matmul(w_categ, v.unsqueeze(2)).squeeze(2)    # (N,imp,h)x(N,h,1)=(N,inp,1)->(N,inp)
            losses = -nn.LogSoftmax(dim=1)(w_pairs).sum(dim=1)    # (N)
            recovered_labels.append(torch.argmin(losses).detach().cpu().item())

        return np.array(recovered_labels)



# train_simple_tab

In [16]:
import os
import sys
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score

import random


def my_train_test_split(X1, X2, y, test_size=0.2, val_size=0.05, random_state=42):
    # X1 - categorical, X2 - continious, y - labels
    assert X1.shape[0] == X2.shape[0] == y.shape[0]
    n1 = int(np.around(test_size * y.shape[0]))
    n2 = int(np.around((val_size + test_size) * y.shape[0]))
    if random_state is not None:
        np.random.seed(random_state)

    idx = np.random.permutation(y.shape[0])

    X1_test = X1[idx][:n1]
    X2_test = X2[idx][:n1]
    y_test = y[idx][:n1]

    X1_val = X1[idx][n1:n2]
    X2_val = X2[idx][n1:n2]
    y_val = y[idx][n1:n2]

    X1_train = X1[idx][n2:]
    X2_train = X2[idx][n2:]
    y_train = y[idx][n2:]

    return X1_train, X2_train, y_train, X1_val, X2_val, y_val, X1_test, X2_test, y_test


class MyDataset(Dataset):
    def __init__(self, X1, X2, y):
        self.X1 = X1
        self.X2 = X2
        self.y = y

    def __len__(self):
        return self.y.shape[0]

    def __getitem__(self, idx):
        return self.X1[idx], self.X2[idx], self.y[idx]


def count_acc(y_true, y_pred):
    # x = (torch.round(nn.Sigmoid()(y_pred)) == y_true).sum().float().item() / y_true.shape[0] / y_true.shape[1]
    # y = accuracy_score(torch.round(nn.Sigmoid()(y_pred)).detach().numpy(), y_true.detach().numpy())
    # assert x == y
    return (torch.round(nn.Sigmoid()(y_pred)) == y_true).sum().float().item() / y_true.shape[0]
    # return accuracy_score(torch.round(nn.Sigmoid()(y_pred)).detach().numpy(), y_true.detach().numpy())
    # return x


def count_auc(model, device, *dataloaders):
    y_true, y_pred = [], []
    with torch.no_grad():
        for dataloader in dataloaders:
            for i, (x_categ, x_cont, labels) in enumerate(dataloader):
                x_categ, x_cont, labels = x_categ.long().to(device), x_cont.float().to(device), labels.float().to(
                    device)
                pred = nn.Sigmoid()(model.forward(x_categ, x_cont))
                y_true.extend(list(labels.detach().cpu().numpy()))
                y_pred.extend(list(pred.detach().cpu().numpy()))

    return roc_auc_score(y_true, y_pred)


def count_pres_rec_f1(model, device, *dataloaders):
    y_true, y_pred = [], []
    with torch.no_grad():
        for dataloader in dataloaders:
            for i, (x_categ, x_cont, labels) in enumerate(dataloader):
                x_categ, x_cont, labels = x_categ.long().to(device), x_cont.float().to(device), labels.float().to(
                    device)
                pred = nn.Sigmoid()(model.forward(x_categ, x_cont))
                y_true.extend(list(labels.detach().cpu().numpy()))
                y_pred.extend(list(torch.round(pred).detach().cpu().numpy()))

    return precision_score(y_true, y_pred, zero_division=0), \
           recall_score(y_true, y_pred, zero_division=0), \
           f1_score(y_true, y_pred, zero_division=0)


def single_pass(model, dataloader, loss_func, device, optim=None):
    loss_count, acc_count = 0, 0
    for i, (x_categ, x_cont, labels) in enumerate(dataloader):
        x_categ, x_cont, labels = x_categ.long().to(device), x_cont.float().to(device), labels.float().to(device)
        pred = model.forward(x_categ, x_cont)
        loss = loss_func(pred, labels)
        loss_count += loss.item()
        acc_count += count_acc(labels, pred)
        # roc_auc += roc_auc_score(labels, pred, average='macro')
        if optim is not None:
            loss.backward()
            optim.step()
    return loss_count / len(dataloader), acc_count / len(dataloader)


def plot_results(ax, train_results: list, val_results: list, test_result, label):
    epochs = np.arange(1, len(train_results) + 1)
    ax.plot(epochs, train_results, label='train')
    ax.plot(epochs, val_results, label='validation')
    ax.plot(epochs[-1], test_result,
            marker='o', linestyle='none', label='test')
    ax.set_xlabel('Epochs')
    ax.set_ylabel(label)
    ax.grid(linestyle=':')
    ax.legend()


def train_model(model, loss, optim, epochs, device, dataloaders, single_pass=single_pass):
    dataloader_train, dataloader_val, dataloader_test = dataloaders
    train_loss_all, val_loss_all, train_acc_all, val_acc_all = [], [], [], []
    # training loop
    for epoch in range(epochs):
        # train
        # print('train')
        train_loss, train_acc = single_pass(model, dataloader_train, loss, device, optim)
        # print('val')
        # validation
        with torch.no_grad():
            val_loss, val_acc = single_pass(model, dataloader_val, loss, device)

        print(
            f'epoch {epoch}, train_loss={train_loss}, validation_loss={val_loss}, train_acc={train_acc}, val_acc={val_acc}')

        train_loss_all.append(train_loss)
        val_loss_all.append(val_loss)
        train_acc_all.append(train_acc)
        val_acc_all.append(val_acc)

    # test
    model.eval()
    with torch.no_grad():
        test_loss, test_acc = single_pass(model, dataloader_test, loss, device)
        test_AUC = count_auc(model, device, dataloader_test)
        pres, rec, f1 = count_pres_rec_f1(model, device, dataloader_test)

        print(f'test_loss={test_loss}, test_acc={test_acc}')
        print('test_AUC=', test_AUC)
        print('pres=', pres, 'rec=', rec, 'f1=', f1)
    model.train()
    # print(count_auc(model, device, dataloader_train, dataloader_val, dataloader_test))
    return train_loss_all, val_loss_all, train_acc_all, val_acc_all, test_loss, test_acc


def my_subplots(train_loss, val_loss, train_acc, val_acc, test_loss, test_acc):
    _, ax1 = plt.subplots()
    plot_results(ax1, train_loss, val_loss, test_loss, 'Loss')
    _, ax2 = plt.subplots()
    plot_results(ax2, train_acc, val_acc, test_acc, 'Accuracy')
    plt.show()



## tabtrain

In [17]:
def tab_train(filename, mode='mlm_single', seed=42):  # mlm_single / mlm_different
    if mode == 'ordinary':
        path = os.path.join('data/preprocessed', filename)
    elif mode == 'naive':
        path = os.path.join('data/recovered', f'{filename}_naive')
    elif mode == 'mlm_single':
        path = os.path.join('data/recovered', f'{filename}_mlm_single')
    elif mode == 'mlm_different':
        path = os.path.join('data/recovered', f'{filename}_mlm_different')

    data_categ = pd.read_csv(os.path.join(path, 'categ.csv')).to_numpy()
    data_cont = pd.read_csv(os.path.join(path, 'cont.csv')).to_numpy()
    data_labels = pd.read_csv(os.path.join(path, 'labels.csv')).to_numpy()
    # print(data_cont.shape)

    X1_train, X2_train, y_train, X1_val, X2_val, y_val, X1_test, X2_test, y_test = my_train_test_split(data_categ,
                                                                                                       data_cont,
                                                                                                       data_labels,
                                                                                                       test_size=0.2,
                                                                                                       val_size=0.05,
                                                                                                       random_state=seed)

    cont_mean_std = np.array([X2_train.mean(axis=0), X2_train.std(axis=0)]).transpose(1, 0)
    cont_mean_std = torch.Tensor(cont_mean_std)

    categories = tuple(len(np.unique(data_categ[:, i])) for i in range(data_categ.shape[1]))
    # print(categories)
    model = TabTransformer(
        categories=categories,  # tuple containing the number of unique values within each category
        num_continuous=data_cont.shape[-1],  # number of continuous values
        dim=32,  # dimension, paper set at 32
        dim_out=1,  # binary prediction, but could be anything
        depth=6,  # depth, paper recommended 6
        heads=8,  # heads, paper recommends 8
        attn_dropout=0.1,  # post-attention dropout
        ff_dropout=0.1,  # feed forward dropout
        mlp_hidden_mults=(4, 2),  # relative multiples of each hidden dimension of the last mlp to logits
        mlp_act=None,  # activation for final mlp, defaults to relu, but could be anything else (selu etc)
        continuous_mean_std=cont_mean_std,  # (optional) - normalize the continuous values before layer norm
        seed=seed
    )

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    batch_size = 1900
    epochs = 10
    lr = 1e-4
    optim = Adam(model.parameters(), lr=lr)
    loss = F.binary_cross_entropy_with_logits

    dataset_train = MyDataset(X1_train, X2_train, y_train)
    dataset_val = MyDataset(X1_val, X2_val, y_val)
    dataset_test = MyDataset(X1_test, X2_test, y_test)

    dataloader_train = DataLoader(dataset_train, batch_size=batch_size)
    dataloader_val = DataLoader(dataset_val, batch_size=64)
    dataloader_test = DataLoader(dataset_test, batch_size=64)
    dataloaders = [dataloader_train, dataloader_val, dataloader_test]

    train_loss_all, val_loss_all, train_acc_all, val_acc_all, test_loss, test_acc = \
        train_model(model, loss, optim, epochs, device, dataloaders)

    return train_loss_all, val_loss_all, train_acc_all, val_acc_all, test_loss, test_acc

# naive recover

In [18]:
  def train(model, data_categ, data_cont, data_labels, seed=42):

    X1_train, X2_train, y_train, X1_val, X2_val, y_val, X1_test, X2_test, y_test = my_train_test_split(data_categ,
                                                                                                       data_cont,
                                                                                                       data_labels,
                                                                                                       test_size=0.2,
                                                                                                       val_size=0.05,
                                                                                                       random_state=seed)

    continuous_mean_std = np.array([X2_train.mean(axis=0), X2_train.std(axis=0)]).transpose(1, 0)
    continuous_mean_std = torch.Tensor(continuous_mean_std)
    model.register_buffer('continuous_mean_std', continuous_mean_std)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    batch_size = 10000
    epochs = 20  # 10
    lr = 1e-4
    optim = Adam(model.parameters(), lr=lr)
    loss = F.binary_cross_entropy_with_logits

    dataset_train = MyDataset(X1_train, X2_train, y_train)
    dataset_val = MyDataset(X1_val, X2_val, y_val)
    dataset_test = MyDataset(X1_test, X2_test, y_test)

    # def seed_worker(worker_id):
    #     worker_seed = torch.initial_seed() % 2 ** 32
    #     np.random.seed(worker_seed)
    #     random.seed(worker_seed)
    #
    # g = torch.Generator()
    # g.manual_seed(0)

    dataloader_train = DataLoader(dataset_train, batch_size=batch_size)
    dataloader_val = DataLoader(dataset_val, batch_size=64)
    dataloader_test = DataLoader(dataset_test, batch_size=64)
    dataloaders = [dataloader_train, dataloader_val, dataloader_test]

    train_loss_all, val_loss_all, train_acc_all, val_acc_all, test_loss, test_acc =\
        train_model(model, loss, optim, epochs, device, dataloaders)
    return train_loss_all, val_loss_all, train_acc_all, val_acc_all, test_loss, test_acc


def separate_nans(data_categ, data_cont, data_labels):
    # find rows with NaNs and separate them from pure data
    # Nans rows are used to recover data in the missings
    idx = np.array([i for i, x in enumerate(data_categ) if not any(np.isnan(x))])
    idx = np.in1d(np.arange(data_categ.shape[0]), idx)    # idx for clear rows
    categ_clear = data_categ[idx]
    cont_clear = data_cont[idx]
    labels_clear = data_labels[idx]

    nidx = np.logical_not(idx)
    categ_nan = data_categ[nidx]
    cont_nan = data_cont[nidx]
    labels_nan = data_labels[np.logical_not(idx)]
    nans_pos = np.where(np.isnan(categ_nan))

    # we suppose that we have only one Nan in rows
    return categ_clear, categ_nan, cont_clear, cont_nan, labels_clear, labels_nan, nans_pos


def main(filename, seed=42):
    # filename = sys.argv[1]
    # data_path = '/content/drive/MyDrive/HSE/NIR/data'
    path = os.path.join('/content/drive/MyDrive/HSE/NIR/data/preprocessed', filename)
    path_nans = os.path.join('/content/drive/MyDrive/HSE/NIR/data/with_nans', filename)
    data_categ_all = pd.read_csv(os.path.join(path_nans, 'categ.csv')).to_numpy()    # read with nans file
    true_labels = pd.read_csv(os.path.join(path_nans, 'true_labels.csv')).to_numpy().reshape(-1)
    data_cont = pd.read_csv(os.path.join(path, 'cont.csv')).to_numpy()
    data_labels = pd.read_csv(os.path.join(path, 'labels.csv')).to_numpy()

    categ_clear, categ_nan, cont_clear, cont_nan, labels_clear, labels_nan, nans_pos =\
        separate_nans(data_categ_all, data_cont, data_labels)    # data_categ is pure of NaNs

    categories = tuple(len(np.unique(categ_clear[:, i])) for i in range(categ_clear.shape[1]))

    model = TabTransformer(
        categories=categories,  # tuple containing the number of unique values within each category
        num_continuous=data_cont.shape[-1],  # number of continuous values
        dim=32,  # dimension, paper set at 32
        dim_out=1,  # binary prediction, but could be anything
        depth=6,  # depth, paper recommended 6
        heads=2,  # heads, paper recommends 8
        attn_dropout=0.1,  # post-attention dropout
        ff_dropout=0.1,  # feed forward dropout
        mlp_hidden_mults=(4, 2),  # relative multiples of each hidden dimension of the last mlp to logits
        mlp_act=None,  # activation for final mlp, defaults to relu, but could be anything else (selu etc)
        continuous_mean_std=None,  # (optional) - normalize the continuous values before layer norm
        seed=seed
    )
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    train(model, categ_clear, cont_clear, labels_clear)
    # recover labels
    recovered_labels = model.naive_recover(categ_nan, nans_pos[1], device)
    print('acc', accuracy_score(true_labels, recovered_labels))

    # insert them into dataset
    categ_nan[nans_pos] = recovered_labels
    new_categ = np.vstack((categ_clear, categ_nan))
    new_cont = np.vstack((cont_clear, cont_nan))
    new_labels = np.vstack((labels_clear, labels_nan))

    pd.DataFrame(data=new_categ).to_csv(f'/content/drive/MyDrive/HSE/NIR/data/recovered/{filename}_naive/categ.csv', index=False)
    pd.DataFrame(data=new_cont).to_csv(f'/content/drive/MyDrive/HSE/NIR/data/recovered/{filename}_naive/cont.csv', index=False)
    pd.DataFrame(data=new_labels).to_csv(f'/content/drive/MyDrive/HSE/NIR/data/recovered/{filename}_naive/labels.csv', index=False)

    return accuracy_score(true_labels, recovered_labels)


# adult 15%

In [19]:
metrics = []

In [20]:
metrics.append(main(filename='adult', seed=42))

epoch 0, train_loss=0.5990528166294098, validation_loss=0.5533750635204893, train_acc=0.7541479991204925, val_acc=0.7629193722943722
epoch 1, train_loss=0.5404990911483765, validation_loss=0.526293312961405, train_acc=0.7627979991204925, val_acc=0.7629193722943722
epoch 2, train_loss=0.5253934413194656, validation_loss=0.5218877539490209, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 3, train_loss=0.5201897025108337, validation_loss=0.506365446430264, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 4, train_loss=0.4923008158802986, validation_loss=0.46003398208907154, train_acc=0.7628729991204926, val_acc=0.7643398268398268
epoch 5, train_loss=0.44238707423210144, validation_loss=0.41306940443588025, train_acc=0.7694410290237467, val_acc=0.7897050865800866
epoch 6, train_loss=0.4075961485505104, validation_loss=0.41064611438548926, train_acc=0.8062182717678101, val_acc=0.8173024891774892
epoch 7, train_loss=0.42802419513463974, validation_loss=0.47402931

100%|██████████| 7326/7326 [00:29<00:00, 245.12it/s]


acc 0.12653562653562653


In [21]:
metrics.append(main(filename='adult', seed=10))

epoch 0, train_loss=0.6094576418399811, validation_loss=0.545303304087032, train_acc=0.7432326297273527, val_acc=0.7633928571428571
epoch 1, train_loss=0.5277517586946487, validation_loss=0.5038165359786062, train_acc=0.7628729991204926, val_acc=0.7629193722943722
epoch 2, train_loss=0.507867231965065, validation_loss=0.5103930921265574, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 3, train_loss=0.5228726416826248, validation_loss=0.5265562651735364, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 4, train_loss=0.5314897298812866, validation_loss=0.5137490783676957, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 5, train_loss=0.502691738307476, validation_loss=0.4600656475081588, train_acc=0.7628979991204925, val_acc=0.763866341991342
epoch 6, train_loss=0.44276632368564606, validation_loss=0.3995020732735143, train_acc=0.769785290237467, val_acc=0.7938311688311689
epoch 7, train_loss=0.3932075798511505, validation_loss=0.381013080929264

100%|██████████| 7326/7326 [00:30<00:00, 241.81it/s]


acc 0.07835107835107835


In [23]:
metrics.append(main(filename='adult', seed=100))

epoch 0, train_loss=0.669890746474266, validation_loss=0.5954241030143969, train_acc=0.598668491644679, val_acc=0.763866341991342
epoch 1, train_loss=0.5691679567098618, validation_loss=0.5320686706990907, train_acc=0.7629479991204926, val_acc=0.7629193722943722
epoch 2, train_loss=0.5262462794780731, validation_loss=0.5199894823811271, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 3, train_loss=0.5291000157594681, validation_loss=0.5406700526223038, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 4, train_loss=0.553960993885994, validation_loss=0.5636178715662523, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 5, train_loss=0.5684778839349747, validation_loss=0.5567064691673625, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 6, train_loss=0.5473040044307709, validation_loss=0.5126947625116869, train_acc=0.7628979991204925, val_acc=0.7633928571428571
epoch 7, train_loss=0.4921822100877762, validation_loss=0.447210217967177

100%|██████████| 7326/7326 [00:30<00:00, 241.68it/s]


acc 0.09391209391209392


In [24]:
metrics.append(main(filename='adult', seed=1000))

epoch 0, train_loss=0.6584912240505219, validation_loss=0.5740347360119675, train_acc=0.6258678759894459, val_acc=0.7614989177489178
epoch 1, train_loss=0.5456288456916809, validation_loss=0.5131699325460376, train_acc=0.7627479991204924, val_acc=0.7629193722943722
epoch 2, train_loss=0.5120210498571396, validation_loss=0.5175526458205599, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 3, train_loss=0.5323549062013626, validation_loss=0.5534839097297553, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 4, train_loss=0.5669949948787689, validation_loss=0.572838997299021, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 5, train_loss=0.5714035332202911, validation_loss=0.5467228627566135, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 6, train_loss=0.5276701152324677, validation_loss=0.47955697052406543, train_acc=0.7628229991204925, val_acc=0.7643398268398268
epoch 7, train_loss=0.4566556587815285, validation_loss=0.41257644602

100%|██████████| 7326/7326 [00:30<00:00, 237.86it/s]


acc 0.10128310128310128


In [25]:
metrics.append(main(filename='adult', seed=10000))

epoch 0, train_loss=0.6139593422412872, validation_loss=0.5563425710707, train_acc=0.7574928759894459, val_acc=0.7629193722943722
epoch 1, train_loss=0.5374782979488373, validation_loss=0.5146578333594582, train_acc=0.7628479991204926, val_acc=0.7629193722943722
epoch 2, train_loss=0.5152581632137299, validation_loss=0.5172929501894749, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 3, train_loss=0.5284291654825211, validation_loss=0.5369074218200914, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 4, train_loss=0.5431093573570251, validation_loss=0.5346482983141234, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 5, train_loss=0.5259758979082108, validation_loss=0.49221891616330005, train_acc=0.7628229991204925, val_acc=0.7629193722943722
epoch 6, train_loss=0.4739871248602867, validation_loss=0.4326152909885753, train_acc=0.7628479991204924, val_acc=0.7695481601731602
epoch 7, train_loss=0.41888685524463654, validation_loss=0.400338347211

100%|██████████| 7326/7326 [00:30<00:00, 241.20it/s]


acc 0.1107016107016107


In [26]:
print(np.mean(metrics), np.std(metrics))

0.10215670215670214 0.016141820741678834
