# Objective: Use AFLite to greedily solve for $\text{arg min}_{S \subset \mathcal{D}, ~|S| \geq n}\mathcal{R}(\Phi, ~S, ~\mathcal{M})$

### 1. Imports and Global Settings

In [1]:
from datasets import load_dataset, disable_caching
from transformers import GPT2TokenizerFast, DataCollatorWithPadding, set_seed
import torch
from torch.nn.functional import one_hot
import copy
import numpy as np
import sys
sys.path.append('..')
from utils_ import tokenize, train_classifier, predict, select_k
import pickle
import itertools
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
set_seed(42)
disable_caching()

### 2. Pre-Processing
- Get SNLI Dataset (Train fold) and shuffle it using the same seed as used for obtaining GPT-2 based Feature Representation (see notebook [Filtering_Part1.ipynb](https://github.com/shashiniyer/adversarial_nli_gpt2/blob/main/gpt2-medium/notebooks_and_scripts/Filtering_Part1.ipynb))
- Remove instances without gold standard labels, i.e., label = -1
- One-hot encoding for labels
- Partition data 10%/90%; use the 90% as `train`
- Tokenise train

In [2]:
snli_train = load_dataset('snli', split = 'train').shuffle(seed = 42)
snli_train = snli_train.filter(lambda x: x['label'] != -1).map( \
    lambda x: {'label': one_hot(torch.tensor(x['label']), 3).type(torch.float32).numpy()}, \
    batched = True)
train = snli_train.select(range(int(len(snli_train)/10), len(snli_train)))

Reusing dataset snli (/home/shana92/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b)


  0%|          | 0/551 [00:00<?, ?ba/s]

  0%|          | 0/550 [00:00<?, ?ba/s]

In [3]:
# set up tokeniser
# padding to left because GPT2 uses last token for prediction
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2-medium", padding_side = 'left', \
                                              padding = True, truncation = True)
tokenizer.pad_token = tokenizer.eos_token # pad with 'eos' token

In [4]:
# tokenize data
train = train.map(lambda x: tokenize(tokenizer, x['premise'] + '|' + x['hypothesis']))
len_bef_exclusion = len(train)

# exclude instances with > 128 tokens
train = train.filter(lambda x: x['exclude'] == False)
len_aft_exclusion = len(train)

# print message if instances were in fact excluded
if len_bef_exclusion - len_aft_exclusion > 0:
    
    print(f'{len_bef_exclusion - len_aft_exclusion} ' + \
          f'({(len_bef_exclusion/len_aft_exclusion - 1)*100:>2f}%) sequences excluded')

  0%|          | 0/494431 [00:00<?, ?ex/s]

  0%|          | 0/495 [00:00<?, ?ba/s]

In [5]:
# keep only needed columns, set data format to PyTorch
train.set_format(type = 'torch', columns = ['label', 'input_ids', 'attention_mask'])

### 3. Set up inputs for AFLite

In [6]:
# load in the feature representation, Phi, with linear layer attached
model = torch.load('feature_rep.pth')

# move model to CPU
model.to('cpu')

# freeze all layers except the last
num_layers = sum(1 for _ in model.parameters())
for idx, param in enumerate(model.parameters()):
    
    if idx != num_layers - 1:
        
        # freeze
        param.requires_grad = False

In [7]:
# set up data collator - https://huggingface.co/docs/transformers/main_classes/data_collator
# this is a (callable) helper object that sends batches of data to the model
data_collator = DataCollatorWithPadding(tokenizer, padding = 'max_length', \
                                         return_tensors = 'pt', max_length = 128)

In [8]:
# hyper-parameters - constrained by training time available
m = 30
n = 195000
t = 50000
k = 100000
tau = 0.75
AFLite_seeds = [0, 1, 2, 3, 4]

In [9]:
# hyper-parameters for model training within AFLite implementation
batch_size = 128 # constrained by GPU memory
lr = 1e-5 # set to match Le et al. (2020) - https://arxiv.org/abs/2002.04108

### 4.  AFLite Procedure

In [None]:
# set up containers to collect outputs
filtered_datasets = {}
removed_idx = {x: '' for x in AFLite_seeds}

# begin procedure
for seed in AFLite_seeds:
    
    # first step of AFLite; initialise S
    S = copy.deepcopy(train)
    
    # initialise iteration index
    it_idx = 0
    
    while len(S) > n:
        
        # update iteration index
        it_idx += 1
        
        # initialise multiset for Out-Of-Sample predictions
        E = {x: [] for x in range(len(S))}

        for j in range(m):
            
            # randomly partition S into (S\T_j, T_j) s.t. |S\T_j| = t
            tr_idx = set(np.random.default_rng(j).choice(np.arange(len(S)), t, replace = False))
            te_idx = set(range(len(S))) - tr_idx
            tr, te = S.select(tr_idx), S.select(te_idx)
            print(f'Seed {seed} - Iteration {it_idx} - Model {j + 1} - Begin')
                        
            # train classifier on S\T_j, i.e. tr
            classifier = copy.deepcopy(model)
            dataloader = torch.utils.data.DataLoader(tr, batch_size=batch_size, \
                                 shuffle=True, collate_fn=data_collator)
            optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, classifier.parameters()), lr = lr)
            trained_classifier = train_classifier(classifier, dataloader, optimizer, device)
            
            # for all instances i in T_j, add predictions to E(i)
            te_dataloader = torch.utils.data.DataLoader(te, batch_size=batch_size, collate_fn=data_collator)
            preds = predict(trained_classifier, te_dataloader, device)
            print(f'Seed {seed} - Iteration {it_idx} - Model {j + 1} - Done')
            
            for pred_idx, data_idx in enumerate(te_idx): # there are as many predictions as test instances
                
                E[data_idx] += [preds[pred_idx]]
        
        # for all instances in S, compute predictability score
        # in the corner case that there are no predictions for an instance, we do not filter it out
        lengths = torch.tensor([len(x) if len(x) > 0 else 1 for x in E.values()])
        preds_padded = torch.tensor(list(itertools.zip_longest(*E.values(), fillvalue=-1))).transpose(0, 1)
        labels = torch.repeat_interleave(S['label'].argmax(1), max(lengths)).reshape(preds_padded.size())

        pred_matches = torch.eq(preds_padded, labels)
        pred_match_totals = torch.sum(pred_matches, axis = 1)
        pred_scores = pred_match_totals / lengths
        
        # select up to k instances with the highest predictability scores subject to score >= tau
        selected_idx = select_k(pred_scores, tau, k, seed)
        
        if selected_idx.shape[0] > 0:
        
            # cache instances selected for removal
            removed_idx[seed] += ',' + ','.join([str(idx) for idx in selected_idx])

            # filter out selected instances
            S = S.select(set(range(len(S))) - set(selected_idx))
        
        # early stopping
        elif selected_idx.shape[0] < k:
            
            break
    
    # cache file
    filtered_datasets[seed] = S
    
    # print number of instances in S, for creating random baseline
    print(f'Number of instances in S (seed {seed}): {len(S)}')
    
# write out list of removed indices for further analysis
with open('removed_idx.pkl', 'wb') as f:
    pickle.dump(removed_idx, f)

Seed 0 - Iteration 1 - Model 1 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.351462  [    0/50000]
loss: 0.339478  [ 4992/50000]
loss: 0.323733  [ 9984/50000]
loss: 0.371513  [14976/50000]
loss: 0.327074  [19968/50000]
loss: 0.414310  [24960/50000]
loss: 0.360435  [29952/50000]
loss: 0.316903  [34944/50000]
loss: 0.329192  [39936/50000]
loss: 0.394657  [44928/50000]
loss: 0.372505  [31200/50000]
Epoch average loss: 0.36408787965774536


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.384505  [    0/50000]
loss: 0.419292  [ 4992/50000]
loss: 0.439487  [ 9984/50000]
loss: 0.422065  [14976/50000]
loss: 0.387640  [19968/50000]
loss: 0.368946  [24960/50000]
loss: 0.349701  [29952/50000]
loss: 0.391921  [34944/50000]
loss: 0.347152  [39936/50000]
loss: 0.438856  [44928/50000]
loss: 0.348565  [31200/50000]
Epoch average loss: 0.3647885024547577


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.370970  [    0/50000]
loss: 0.397175  [ 4992/50000]
loss: 0.425714  [ 9984/50000]
loss: 0.365622  [14976/50000]
loss: 0.376260  [19968/50000]
loss: 0.401830  [24960/50000]
loss: 0.397583  [29952/50000]
loss: 0.368095  [34944/50000]
loss: 0.393686  [39936/50000]
loss: 0.346776  [44928/50000]
loss: 0.398886  [31200/50000]
Epoch average loss: 0.3642916977405548
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 1 - Done
Seed 0 - Iteration 1 - Model 2 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.452623  [    0/50000]
loss: 0.356459  [ 4992/50000]
loss: 0.327671  [ 9984/50000]
loss: 0.355331  [14976/50000]
loss: 0.380826  [19968/50000]
loss: 0.385904  [24960/50000]
loss: 0.325877  [29952/50000]
loss: 0.453212  [34944/50000]
loss: 0.400007  [39936/50000]
loss: 0.353646  [44928/50000]
loss: 0.425990  [31200/50000]
Epoch average loss: 0.3637206256389618


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.401627  [    0/50000]
loss: 0.381684  [ 4992/50000]
loss: 0.378271  [ 9984/50000]
loss: 0.373003  [14976/50000]
loss: 0.325675  [19968/50000]
loss: 0.369392  [24960/50000]
loss: 0.430628  [29952/50000]
loss: 0.337135  [34944/50000]
loss: 0.329574  [39936/50000]
loss: 0.341632  [44928/50000]
loss: 0.334829  [31200/50000]
Epoch average loss: 0.3633255660533905


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.339411  [    0/50000]
loss: 0.377477  [ 4992/50000]
loss: 0.333938  [ 9984/50000]
loss: 0.350429  [14976/50000]
loss: 0.367987  [19968/50000]
loss: 0.329853  [24960/50000]
loss: 0.338207  [29952/50000]
loss: 0.333629  [34944/50000]
loss: 0.447789  [39936/50000]
loss: 0.362146  [44928/50000]
loss: 0.446653  [31200/50000]
Epoch average loss: 0.3627510368824005
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 2 - Done
Seed 0 - Iteration 1 - Model 3 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.366838  [    0/50000]
loss: 0.346938  [ 4992/50000]
loss: 0.372451  [ 9984/50000]
loss: 0.320485  [14976/50000]
loss: 0.356794  [19968/50000]
loss: 0.369373  [24960/50000]
loss: 0.363575  [29952/50000]
loss: 0.308874  [34944/50000]
loss: 0.393721  [39936/50000]
loss: 0.382872  [44928/50000]
loss: 0.380604  [31200/50000]
Epoch average loss: 0.36189162731170654


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.301175  [    0/50000]
loss: 0.349950  [ 4992/50000]
loss: 0.352163  [ 9984/50000]
loss: 0.348009  [14976/50000]
loss: 0.409791  [19968/50000]
loss: 0.365269  [24960/50000]
loss: 0.377372  [29952/50000]
loss: 0.361114  [34944/50000]
loss: 0.318849  [39936/50000]
loss: 0.366322  [44928/50000]
loss: 0.283538  [31200/50000]
Epoch average loss: 0.3623788058757782


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.402654  [    0/50000]
loss: 0.303437  [ 4992/50000]
loss: 0.339138  [ 9984/50000]
loss: 0.330377  [14976/50000]
loss: 0.326630  [19968/50000]
loss: 0.367296  [24960/50000]
loss: 0.391380  [29952/50000]
loss: 0.333377  [34944/50000]
loss: 0.347316  [39936/50000]
loss: 0.327635  [44928/50000]
loss: 0.384607  [31200/50000]
Epoch average loss: 0.36186596751213074
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 3 - Done
Seed 0 - Iteration 1 - Model 4 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.450345  [    0/50000]
loss: 0.317346  [ 4992/50000]
loss: 0.383968  [ 9984/50000]
loss: 0.351726  [14976/50000]
loss: 0.438074  [19968/50000]
loss: 0.399947  [24960/50000]
loss: 0.372646  [29952/50000]
loss: 0.355177  [34944/50000]
loss: 0.349292  [39936/50000]
loss: 0.403655  [44928/50000]
loss: 0.445391  [31200/50000]
Epoch average loss: 0.36558693647384644


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.363079  [    0/50000]
loss: 0.376719  [ 4992/50000]
loss: 0.353817  [ 9984/50000]
loss: 0.318308  [14976/50000]
loss: 0.328502  [19968/50000]
loss: 0.455255  [24960/50000]
loss: 0.387360  [29952/50000]
loss: 0.333444  [34944/50000]
loss: 0.344278  [39936/50000]
loss: 0.293226  [44928/50000]
loss: 0.403219  [31200/50000]
Epoch average loss: 0.3643246293067932


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.395858  [    0/50000]
loss: 0.365659  [ 4992/50000]
loss: 0.390630  [ 9984/50000]
loss: 0.324348  [14976/50000]
loss: 0.278239  [19968/50000]
loss: 0.363707  [24960/50000]
loss: 0.341512  [29952/50000]
loss: 0.475533  [34944/50000]
loss: 0.366447  [39936/50000]
loss: 0.297043  [44928/50000]
loss: 0.362669  [31200/50000]
Epoch average loss: 0.3626748323440552
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 4 - Done
Seed 0 - Iteration 1 - Model 5 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.354864  [    0/50000]
loss: 0.423739  [ 4992/50000]
loss: 0.302279  [ 9984/50000]
loss: 0.379920  [14976/50000]
loss: 0.327253  [19968/50000]
loss: 0.316194  [24960/50000]
loss: 0.353090  [29952/50000]
loss: 0.371466  [34944/50000]
loss: 0.357100  [39936/50000]
loss: 0.318972  [44928/50000]
loss: 0.248706  [31200/50000]
Epoch average loss: 0.36417701840400696


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.337895  [    0/50000]
loss: 0.337823  [ 4992/50000]
loss: 0.417160  [ 9984/50000]
loss: 0.353334  [14976/50000]
loss: 0.247678  [19968/50000]
loss: 0.386853  [24960/50000]
loss: 0.353052  [29952/50000]
loss: 0.301107  [34944/50000]
loss: 0.368206  [39936/50000]
loss: 0.361346  [44928/50000]
loss: 0.377103  [31200/50000]
Epoch average loss: 0.363930344581604


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.437157  [    0/50000]
loss: 0.371344  [ 4992/50000]
loss: 0.338309  [ 9984/50000]
loss: 0.340573  [14976/50000]
loss: 0.400044  [19968/50000]
loss: 0.385437  [24960/50000]
loss: 0.421255  [29952/50000]
loss: 0.399701  [34944/50000]
loss: 0.279014  [39936/50000]
loss: 0.359794  [44928/50000]
loss: 0.394982  [31200/50000]
Epoch average loss: 0.3622462749481201
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 5 - Done
Seed 0 - Iteration 1 - Model 6 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.336022  [    0/50000]
loss: 0.351029  [ 4992/50000]
loss: 0.377041  [ 9984/50000]
loss: 0.396351  [14976/50000]
loss: 0.358240  [19968/50000]
loss: 0.378018  [24960/50000]
loss: 0.369751  [29952/50000]
loss: 0.375956  [34944/50000]
loss: 0.362680  [39936/50000]
loss: 0.457246  [44928/50000]
loss: 0.388104  [31200/50000]
Epoch average loss: 0.3646976947784424


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.355678  [    0/50000]
loss: 0.333950  [ 4992/50000]
loss: 0.245880  [ 9984/50000]
loss: 0.373475  [14976/50000]
loss: 0.365241  [19968/50000]
loss: 0.388567  [24960/50000]
loss: 0.323007  [29952/50000]
loss: 0.388598  [34944/50000]
loss: 0.437647  [39936/50000]
loss: 0.326079  [44928/50000]
loss: 0.358231  [31200/50000]
Epoch average loss: 0.3642202317714691


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.365064  [    0/50000]
loss: 0.356610  [ 4992/50000]
loss: 0.343495  [ 9984/50000]
loss: 0.374960  [14976/50000]
loss: 0.394344  [19968/50000]
loss: 0.397781  [24960/50000]
loss: 0.476880  [29952/50000]
loss: 0.401443  [34944/50000]
loss: 0.305604  [39936/50000]
loss: 0.367382  [44928/50000]
loss: 0.396358  [31200/50000]
Epoch average loss: 0.36598607897758484
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 6 - Done
Seed 0 - Iteration 1 - Model 7 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.390657  [    0/50000]
loss: 0.372994  [ 4992/50000]
loss: 0.349834  [ 9984/50000]
loss: 0.317244  [14976/50000]
loss: 0.322547  [19968/50000]
loss: 0.356212  [24960/50000]
loss: 0.377633  [29952/50000]
loss: 0.376222  [34944/50000]
loss: 0.397174  [39936/50000]
loss: 0.371627  [44928/50000]
loss: 0.357398  [31200/50000]
Epoch average loss: 0.3637089431285858


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.334312  [    0/50000]
loss: 0.379212  [ 4992/50000]
loss: 0.376179  [ 9984/50000]
loss: 0.375343  [14976/50000]
loss: 0.314856  [19968/50000]
loss: 0.366235  [24960/50000]
loss: 0.310485  [29952/50000]
loss: 0.376083  [34944/50000]
loss: 0.329782  [39936/50000]
loss: 0.394047  [44928/50000]
loss: 0.390445  [31200/50000]
Epoch average loss: 0.3633396029472351


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.374011  [    0/50000]
loss: 0.319427  [ 4992/50000]
loss: 0.351540  [ 9984/50000]
loss: 0.335823  [14976/50000]
loss: 0.342326  [19968/50000]
loss: 0.367685  [24960/50000]
loss: 0.378713  [29952/50000]
loss: 0.388330  [34944/50000]
loss: 0.427908  [39936/50000]
loss: 0.331632  [44928/50000]
loss: 0.352829  [31200/50000]
Epoch average loss: 0.3623497486114502
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 7 - Done
Seed 0 - Iteration 1 - Model 8 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.390779  [    0/50000]
loss: 0.325676  [ 4992/50000]
loss: 0.295188  [ 9984/50000]
loss: 0.383139  [14976/50000]
loss: 0.300894  [19968/50000]
loss: 0.366453  [24960/50000]
loss: 0.329902  [29952/50000]
loss: 0.337931  [34944/50000]
loss: 0.436437  [39936/50000]
loss: 0.362179  [44928/50000]
loss: 0.486388  [31200/50000]
Epoch average loss: 0.3620840609073639


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.363689  [    0/50000]
loss: 0.368611  [ 4992/50000]
loss: 0.398867  [ 9984/50000]
loss: 0.438197  [14976/50000]
loss: 0.435775  [19968/50000]
loss: 0.345470  [24960/50000]
loss: 0.389586  [29952/50000]
loss: 0.415257  [34944/50000]
loss: 0.415230  [39936/50000]
loss: 0.343064  [44928/50000]
loss: 0.374341  [31200/50000]
Epoch average loss: 0.36166685819625854


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.360981  [    0/50000]
loss: 0.351153  [ 4992/50000]
loss: 0.371974  [ 9984/50000]
loss: 0.427797  [14976/50000]
loss: 0.330811  [19968/50000]
loss: 0.318212  [24960/50000]
loss: 0.380872  [29952/50000]
loss: 0.344258  [34944/50000]
loss: 0.431164  [39936/50000]
loss: 0.289747  [44928/50000]
loss: 0.394471  [31200/50000]
Epoch average loss: 0.3609331250190735
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 8 - Done
Seed 0 - Iteration 1 - Model 9 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.361636  [    0/50000]
loss: 0.353745  [ 4992/50000]
loss: 0.342625  [ 9984/50000]
loss: 0.354838  [14976/50000]
loss: 0.373197  [19968/50000]
loss: 0.376030  [24960/50000]
loss: 0.340168  [29952/50000]
loss: 0.350261  [34944/50000]
loss: 0.362042  [39936/50000]
loss: 0.384833  [44928/50000]
loss: 0.353891  [31200/50000]
Epoch average loss: 0.3640929162502289


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.355854  [    0/50000]
loss: 0.358534  [ 4992/50000]
loss: 0.346791  [ 9984/50000]
loss: 0.372979  [14976/50000]
loss: 0.436486  [19968/50000]
loss: 0.378427  [24960/50000]
loss: 0.306944  [29952/50000]
loss: 0.426346  [34944/50000]
loss: 0.421459  [39936/50000]
loss: 0.330681  [44928/50000]
loss: 0.289165  [31200/50000]
Epoch average loss: 0.36303257942199707


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.361978  [    0/50000]
loss: 0.375472  [ 4992/50000]
loss: 0.322451  [ 9984/50000]
loss: 0.369862  [14976/50000]
loss: 0.410102  [19968/50000]
loss: 0.327101  [24960/50000]
loss: 0.332840  [29952/50000]
loss: 0.344820  [34944/50000]
loss: 0.389067  [39936/50000]
loss: 0.340831  [44928/50000]
loss: 0.367330  [31200/50000]
Epoch average loss: 0.3624172508716583
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 9 - Done
Seed 0 - Iteration 1 - Model 10 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.330019  [    0/50000]
loss: 0.375307  [ 4992/50000]
loss: 0.336338  [ 9984/50000]
loss: 0.327452  [14976/50000]
loss: 0.344552  [19968/50000]
loss: 0.317110  [24960/50000]
loss: 0.425709  [29952/50000]
loss: 0.359704  [34944/50000]
loss: 0.443153  [39936/50000]
loss: 0.329729  [44928/50000]
loss: 0.325446  [31200/50000]
Epoch average loss: 0.36376819014549255


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.352848  [    0/50000]
loss: 0.398039  [ 4992/50000]
loss: 0.359116  [ 9984/50000]
loss: 0.379244  [14976/50000]
loss: 0.311036  [19968/50000]
loss: 0.268580  [24960/50000]
loss: 0.346778  [29952/50000]
loss: 0.366569  [34944/50000]
loss: 0.385503  [39936/50000]
loss: 0.334107  [44928/50000]
loss: 0.337605  [31200/50000]
Epoch average loss: 0.3622015118598938


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.388137  [    0/50000]
loss: 0.336869  [ 4992/50000]
loss: 0.373253  [ 9984/50000]
loss: 0.349266  [14976/50000]
loss: 0.399636  [19968/50000]
loss: 0.326053  [24960/50000]
loss: 0.353333  [29952/50000]
loss: 0.325388  [34944/50000]
loss: 0.456942  [39936/50000]
loss: 0.306415  [44928/50000]
loss: 0.331770  [31200/50000]
Epoch average loss: 0.36316004395484924
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 10 - Done
Seed 0 - Iteration 1 - Model 11 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.476463  [    0/50000]
loss: 0.391122  [ 4992/50000]
loss: 0.396364  [ 9984/50000]
loss: 0.331154  [14976/50000]
loss: 0.345967  [19968/50000]
loss: 0.288185  [24960/50000]
loss: 0.341043  [29952/50000]
loss: 0.407893  [34944/50000]
loss: 0.344976  [39936/50000]
loss: 0.388872  [44928/50000]
loss: 0.458609  [31200/50000]
Epoch average loss: 0.36211076378822327


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.320051  [    0/50000]
loss: 0.412471  [ 4992/50000]
loss: 0.373504  [ 9984/50000]
loss: 0.410617  [14976/50000]
loss: 0.376297  [19968/50000]
loss: 0.346205  [24960/50000]
loss: 0.330169  [29952/50000]
loss: 0.321824  [34944/50000]
loss: 0.393182  [39936/50000]
loss: 0.355243  [44928/50000]
loss: 0.363106  [31200/50000]
Epoch average loss: 0.3602425158023834


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.347996  [    0/50000]
loss: 0.401305  [ 4992/50000]
loss: 0.390456  [ 9984/50000]
loss: 0.346511  [14976/50000]
loss: 0.327521  [19968/50000]
loss: 0.356045  [24960/50000]
loss: 0.440367  [29952/50000]
loss: 0.325684  [34944/50000]
loss: 0.342712  [39936/50000]
loss: 0.402023  [44928/50000]
loss: 0.338603  [31200/50000]
Epoch average loss: 0.3596479296684265
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 11 - Done
Seed 0 - Iteration 1 - Model 12 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.305196  [    0/50000]
loss: 0.425401  [ 4992/50000]
loss: 0.384915  [ 9984/50000]
loss: 0.356200  [14976/50000]
loss: 0.372947  [19968/50000]
loss: 0.336649  [24960/50000]
loss: 0.327356  [29952/50000]
loss: 0.356442  [34944/50000]
loss: 0.315653  [39936/50000]
loss: 0.338652  [44928/50000]
loss: 0.325400  [31200/50000]
Epoch average loss: 0.3645945191383362


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.370230  [    0/50000]
loss: 0.397319  [ 4992/50000]
loss: 0.378303  [ 9984/50000]
loss: 0.312160  [14976/50000]
loss: 0.325364  [19968/50000]
loss: 0.375253  [24960/50000]
loss: 0.365874  [29952/50000]
loss: 0.453730  [34944/50000]
loss: 0.384263  [39936/50000]
loss: 0.341593  [44928/50000]
loss: 0.375315  [31200/50000]
Epoch average loss: 0.3636467456817627


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.363376  [    0/50000]
loss: 0.347720  [ 4992/50000]
loss: 0.328529  [ 9984/50000]
loss: 0.387154  [14976/50000]
loss: 0.435768  [19968/50000]
loss: 0.360690  [24960/50000]
loss: 0.352456  [29952/50000]
loss: 0.388876  [34944/50000]
loss: 0.368963  [39936/50000]
loss: 0.304496  [44928/50000]
loss: 0.391787  [31200/50000]
Epoch average loss: 0.36266589164733887
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 12 - Done
Seed 0 - Iteration 1 - Model 13 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.374750  [    0/50000]
loss: 0.328012  [ 4992/50000]
loss: 0.351125  [ 9984/50000]
loss: 0.353089  [14976/50000]
loss: 0.358106  [19968/50000]
loss: 0.356697  [24960/50000]
loss: 0.369399  [29952/50000]
loss: 0.376099  [34944/50000]
loss: 0.328564  [39936/50000]
loss: 0.412162  [44928/50000]
loss: 0.363897  [31200/50000]
Epoch average loss: 0.36006292700767517


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.376692  [    0/50000]
loss: 0.355549  [ 4992/50000]
loss: 0.359856  [ 9984/50000]
loss: 0.409115  [14976/50000]
loss: 0.402620  [19968/50000]
loss: 0.426629  [24960/50000]
loss: 0.429962  [29952/50000]
loss: 0.301459  [34944/50000]
loss: 0.344090  [39936/50000]
loss: 0.437818  [44928/50000]
loss: 0.332970  [31200/50000]
Epoch average loss: 0.35935863852500916


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.383538  [    0/50000]
loss: 0.344823  [ 4992/50000]
loss: 0.367870  [ 9984/50000]
loss: 0.301781  [14976/50000]
loss: 0.335267  [19968/50000]
loss: 0.385644  [24960/50000]
loss: 0.296690  [29952/50000]
loss: 0.343280  [34944/50000]
loss: 0.349306  [39936/50000]
loss: 0.353354  [44928/50000]
loss: 0.408420  [31200/50000]
Epoch average loss: 0.36044007539749146
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 13 - Done
Seed 0 - Iteration 1 - Model 14 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.390458  [    0/50000]
loss: 0.421318  [ 4992/50000]
loss: 0.392669  [ 9984/50000]
loss: 0.363765  [14976/50000]
loss: 0.439529  [19968/50000]
loss: 0.372569  [24960/50000]
loss: 0.361718  [29952/50000]
loss: 0.387821  [34944/50000]
loss: 0.313374  [39936/50000]
loss: 0.296963  [44928/50000]
loss: 0.368381  [31200/50000]
Epoch average loss: 0.36190032958984375


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.404707  [    0/50000]
loss: 0.411868  [ 4992/50000]
loss: 0.435066  [ 9984/50000]
loss: 0.377780  [14976/50000]
loss: 0.301795  [19968/50000]
loss: 0.342448  [24960/50000]
loss: 0.338764  [29952/50000]
loss: 0.391774  [34944/50000]
loss: 0.354964  [39936/50000]
loss: 0.388312  [44928/50000]
loss: 0.353222  [31200/50000]
Epoch average loss: 0.361621618270874


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.376763  [    0/50000]
loss: 0.371289  [ 4992/50000]
loss: 0.415232  [ 9984/50000]
loss: 0.310534  [14976/50000]
loss: 0.402551  [19968/50000]
loss: 0.344138  [24960/50000]
loss: 0.327739  [29952/50000]
loss: 0.325396  [34944/50000]
loss: 0.305879  [39936/50000]
loss: 0.310961  [44928/50000]
loss: 0.419488  [31200/50000]
Epoch average loss: 0.3611414432525635
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 14 - Done
Seed 0 - Iteration 1 - Model 15 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.370098  [    0/50000]
loss: 0.334970  [ 4992/50000]
loss: 0.304342  [ 9984/50000]
loss: 0.389424  [14976/50000]
loss: 0.359603  [19968/50000]
loss: 0.372690  [24960/50000]
loss: 0.329862  [29952/50000]
loss: 0.295113  [34944/50000]
loss: 0.394753  [39936/50000]
loss: 0.357889  [44928/50000]
loss: 0.349648  [31200/50000]
Epoch average loss: 0.3636859655380249


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.403201  [    0/50000]
loss: 0.430501  [ 4992/50000]
loss: 0.388948  [ 9984/50000]
loss: 0.434114  [14976/50000]
loss: 0.378376  [19968/50000]
loss: 0.447573  [24960/50000]
loss: 0.282820  [29952/50000]
loss: 0.391420  [34944/50000]
loss: 0.360463  [39936/50000]
loss: 0.368648  [44928/50000]
loss: 0.366842  [31200/50000]
Epoch average loss: 0.3646688461303711


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.342642  [    0/50000]
loss: 0.333127  [ 4992/50000]
loss: 0.344185  [ 9984/50000]
loss: 0.387421  [14976/50000]
loss: 0.379242  [19968/50000]
loss: 0.299821  [24960/50000]
loss: 0.363608  [29952/50000]
loss: 0.346044  [34944/50000]
loss: 0.418337  [39936/50000]
loss: 0.359324  [44928/50000]
loss: 0.356870  [31200/50000]
Epoch average loss: 0.3634376525878906
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 15 - Done
Seed 0 - Iteration 1 - Model 16 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.339468  [    0/50000]
loss: 0.359726  [ 4992/50000]
loss: 0.383265  [ 9984/50000]
loss: 0.385232  [14976/50000]
loss: 0.385422  [19968/50000]
loss: 0.362604  [24960/50000]
loss: 0.346476  [29952/50000]
loss: 0.392749  [34944/50000]
loss: 0.384340  [39936/50000]
loss: 0.367833  [44928/50000]
loss: 0.376084  [31200/50000]
Epoch average loss: 0.36060643196105957


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.403030  [    0/50000]
loss: 0.378115  [ 4992/50000]
loss: 0.322186  [ 9984/50000]
loss: 0.376394  [14976/50000]
loss: 0.323855  [19968/50000]
loss: 0.307284  [24960/50000]
loss: 0.370953  [29952/50000]
loss: 0.346822  [34944/50000]
loss: 0.370176  [39936/50000]
loss: 0.357967  [44928/50000]
loss: 0.437473  [31200/50000]
Epoch average loss: 0.3596935570240021


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.377497  [    0/50000]
loss: 0.281500  [ 4992/50000]
loss: 0.331955  [ 9984/50000]
loss: 0.342512  [14976/50000]
loss: 0.356372  [19968/50000]
loss: 0.300949  [24960/50000]
loss: 0.345081  [29952/50000]
loss: 0.282737  [34944/50000]
loss: 0.362192  [39936/50000]
loss: 0.358499  [44928/50000]
loss: 0.385571  [31200/50000]
Epoch average loss: 0.3591921329498291
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 16 - Done
Seed 0 - Iteration 1 - Model 17 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.358868  [    0/50000]
loss: 0.306873  [ 4992/50000]
loss: 0.417009  [ 9984/50000]
loss: 0.398278  [14976/50000]
loss: 0.341036  [19968/50000]
loss: 0.361411  [24960/50000]
loss: 0.434123  [29952/50000]
loss: 0.309153  [34944/50000]
loss: 0.382650  [39936/50000]
loss: 0.376854  [44928/50000]
loss: 0.350059  [31200/50000]
Epoch average loss: 0.36135849356651306


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.346300  [    0/50000]
loss: 0.398281  [ 4992/50000]
loss: 0.374269  [ 9984/50000]
loss: 0.357463  [14976/50000]
loss: 0.291889  [19968/50000]
loss: 0.395196  [24960/50000]
loss: 0.289273  [29952/50000]
loss: 0.332256  [34944/50000]
loss: 0.390872  [39936/50000]
loss: 0.380385  [44928/50000]
loss: 0.353897  [31200/50000]
Epoch average loss: 0.3606092631816864


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.366479  [    0/50000]
loss: 0.319340  [ 4992/50000]
loss: 0.307002  [ 9984/50000]
loss: 0.392530  [14976/50000]
loss: 0.382558  [19968/50000]
loss: 0.323694  [24960/50000]
loss: 0.334525  [29952/50000]
loss: 0.367625  [34944/50000]
loss: 0.337564  [39936/50000]
loss: 0.421555  [44928/50000]
loss: 0.335695  [31200/50000]
Epoch average loss: 0.3615202009677887
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 17 - Done
Seed 0 - Iteration 1 - Model 18 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.418690  [    0/50000]
loss: 0.377462  [ 4992/50000]
loss: 0.336222  [ 9984/50000]
loss: 0.365763  [14976/50000]
loss: 0.348595  [19968/50000]
loss: 0.390724  [24960/50000]
loss: 0.372193  [29952/50000]
loss: 0.329988  [34944/50000]
loss: 0.393999  [39936/50000]
loss: 0.341273  [44928/50000]
loss: 0.284871  [31200/50000]
Epoch average loss: 0.3657374978065491


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.320023  [    0/50000]
loss: 0.385398  [ 4992/50000]
loss: 0.318665  [ 9984/50000]
loss: 0.450725  [14976/50000]
loss: 0.345664  [19968/50000]
loss: 0.311902  [24960/50000]
loss: 0.338817  [29952/50000]
loss: 0.382710  [34944/50000]
loss: 0.373794  [39936/50000]
loss: 0.439936  [44928/50000]
loss: 0.334433  [31200/50000]
Epoch average loss: 0.3659549057483673


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.398775  [    0/50000]
loss: 0.341598  [ 4992/50000]
loss: 0.367600  [ 9984/50000]
loss: 0.364051  [14976/50000]
loss: 0.298030  [19968/50000]
loss: 0.334858  [24960/50000]
loss: 0.368899  [29952/50000]
loss: 0.418087  [34944/50000]
loss: 0.370150  [39936/50000]
loss: 0.301672  [44928/50000]
loss: 0.324703  [31200/50000]
Epoch average loss: 0.3650110960006714
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 18 - Done
Seed 0 - Iteration 1 - Model 19 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.374899  [    0/50000]
loss: 0.388804  [ 4992/50000]
loss: 0.392945  [ 9984/50000]
loss: 0.398042  [14976/50000]
loss: 0.440538  [19968/50000]
loss: 0.293786  [24960/50000]
loss: 0.321113  [29952/50000]
loss: 0.375648  [34944/50000]
loss: 0.406474  [39936/50000]
loss: 0.397156  [44928/50000]
loss: 0.381035  [31200/50000]
Epoch average loss: 0.3621770143508911


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.304778  [    0/50000]
loss: 0.408918  [ 4992/50000]
loss: 0.264013  [ 9984/50000]
loss: 0.346140  [14976/50000]
loss: 0.309258  [19968/50000]
loss: 0.296121  [24960/50000]
loss: 0.386139  [29952/50000]
loss: 0.356022  [34944/50000]
loss: 0.370108  [39936/50000]
loss: 0.329175  [44928/50000]
loss: 0.280125  [31200/50000]
Epoch average loss: 0.3625127673149109


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.332310  [    0/50000]
loss: 0.283148  [ 4992/50000]
loss: 0.376709  [ 9984/50000]
loss: 0.363563  [14976/50000]
loss: 0.361886  [19968/50000]
loss: 0.368435  [24960/50000]
loss: 0.388276  [29952/50000]
loss: 0.384575  [34944/50000]
loss: 0.338917  [39936/50000]
loss: 0.437541  [44928/50000]
loss: 0.351064  [31200/50000]
Epoch average loss: 0.36002734303474426
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 19 - Done
Seed 0 - Iteration 1 - Model 20 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.390528  [    0/50000]
loss: 0.373576  [ 4992/50000]
loss: 0.387936  [ 9984/50000]
loss: 0.330340  [14976/50000]
loss: 0.399550  [19968/50000]
loss: 0.381654  [24960/50000]
loss: 0.359380  [29952/50000]
loss: 0.345219  [34944/50000]
loss: 0.358643  [39936/50000]
loss: 0.366963  [44928/50000]
loss: 0.362465  [31200/50000]
Epoch average loss: 0.36491233110427856


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.367850  [    0/50000]
loss: 0.313505  [ 4992/50000]
loss: 0.358208  [ 9984/50000]
loss: 0.338196  [14976/50000]
loss: 0.370171  [19968/50000]
loss: 0.290296  [24960/50000]
loss: 0.354375  [29952/50000]
loss: 0.356045  [34944/50000]
loss: 0.326587  [39936/50000]
loss: 0.348610  [44928/50000]
loss: 0.355721  [31200/50000]
Epoch average loss: 0.36214619874954224


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.406973  [    0/50000]
loss: 0.370173  [ 4992/50000]
loss: 0.369065  [ 9984/50000]
loss: 0.410013  [14976/50000]
loss: 0.383433  [19968/50000]
loss: 0.353235  [24960/50000]
loss: 0.283358  [29952/50000]
loss: 0.338477  [34944/50000]
loss: 0.390919  [39936/50000]
loss: 0.354199  [44928/50000]
loss: 0.224238  [31200/50000]
Epoch average loss: 0.36293262243270874
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 20 - Done
Seed 0 - Iteration 1 - Model 21 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.344620  [    0/50000]
loss: 0.354984  [ 4992/50000]
loss: 0.319327  [ 9984/50000]
loss: 0.313120  [14976/50000]
loss: 0.331734  [19968/50000]
loss: 0.395778  [24960/50000]
loss: 0.444374  [29952/50000]
loss: 0.389187  [34944/50000]
loss: 0.329107  [39936/50000]
loss: 0.422290  [44928/50000]
loss: 0.350464  [31200/50000]
Epoch average loss: 0.3639088571071625


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.350123  [    0/50000]
loss: 0.377296  [ 4992/50000]
loss: 0.378276  [ 9984/50000]
loss: 0.389440  [14976/50000]
loss: 0.333937  [19968/50000]
loss: 0.386857  [24960/50000]
loss: 0.330650  [29952/50000]
loss: 0.371082  [34944/50000]
loss: 0.366372  [39936/50000]
loss: 0.348345  [44928/50000]
loss: 0.394093  [31200/50000]
Epoch average loss: 0.3628416955471039


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.356332  [    0/50000]
loss: 0.332688  [ 4992/50000]
loss: 0.383956  [ 9984/50000]
loss: 0.338025  [14976/50000]
loss: 0.336740  [19968/50000]
loss: 0.357334  [24960/50000]
loss: 0.323870  [29952/50000]
loss: 0.331467  [34944/50000]
loss: 0.366556  [39936/50000]
loss: 0.352404  [44928/50000]
loss: 0.278491  [31200/50000]
Epoch average loss: 0.3637048304080963
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 21 - Done
Seed 0 - Iteration 1 - Model 22 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.346644  [    0/50000]
loss: 0.428531  [ 4992/50000]
loss: 0.388327  [ 9984/50000]
loss: 0.371248  [14976/50000]
loss: 0.355170  [19968/50000]
loss: 0.338780  [24960/50000]
loss: 0.381980  [29952/50000]
loss: 0.345267  [34944/50000]
loss: 0.344141  [39936/50000]
loss: 0.326215  [44928/50000]
loss: 0.400209  [31200/50000]
Epoch average loss: 0.3649917542934418


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.338597  [    0/50000]
loss: 0.453980  [ 4992/50000]
loss: 0.413363  [ 9984/50000]
loss: 0.341489  [14976/50000]
loss: 0.365409  [19968/50000]
loss: 0.388801  [24960/50000]
loss: 0.349256  [29952/50000]
loss: 0.388616  [34944/50000]
loss: 0.345283  [39936/50000]
loss: 0.396503  [44928/50000]
loss: 0.360414  [31200/50000]
Epoch average loss: 0.3636026978492737


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.365952  [    0/50000]
loss: 0.391421  [ 4992/50000]
loss: 0.322531  [ 9984/50000]
loss: 0.348279  [14976/50000]
loss: 0.354986  [19968/50000]
loss: 0.325951  [24960/50000]
loss: 0.429619  [29952/50000]
loss: 0.408751  [34944/50000]
loss: 0.398096  [39936/50000]
loss: 0.384907  [44928/50000]
loss: 0.389816  [31200/50000]
Epoch average loss: 0.3645685315132141
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 22 - Done
Seed 0 - Iteration 1 - Model 23 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.351953  [    0/50000]
loss: 0.402540  [ 4992/50000]
loss: 0.342083  [ 9984/50000]
loss: 0.347482  [14976/50000]
loss: 0.337659  [19968/50000]
loss: 0.389558  [24960/50000]
loss: 0.334745  [29952/50000]
loss: 0.384813  [34944/50000]
loss: 0.349100  [39936/50000]
loss: 0.390132  [44928/50000]
loss: 0.345509  [31200/50000]
Epoch average loss: 0.3610244691371918


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.320654  [    0/50000]
loss: 0.401340  [ 4992/50000]
loss: 0.348927  [ 9984/50000]
loss: 0.355229  [14976/50000]
loss: 0.402605  [19968/50000]
loss: 0.358957  [24960/50000]
loss: 0.305994  [29952/50000]
loss: 0.418309  [34944/50000]
loss: 0.392044  [39936/50000]
loss: 0.379076  [44928/50000]
loss: 0.380359  [31200/50000]
Epoch average loss: 0.36064931750297546


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.339567  [    0/50000]
loss: 0.388550  [ 4992/50000]
loss: 0.325630  [ 9984/50000]
loss: 0.329347  [14976/50000]
loss: 0.367544  [19968/50000]
loss: 0.339536  [24960/50000]
loss: 0.311443  [29952/50000]
loss: 0.413085  [34944/50000]
loss: 0.376573  [39936/50000]
loss: 0.393170  [44928/50000]
loss: 0.410186  [31200/50000]
Epoch average loss: 0.3600997030735016
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 23 - Done
Seed 0 - Iteration 1 - Model 24 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.375023  [    0/50000]
loss: 0.337850  [ 4992/50000]
loss: 0.424908  [ 9984/50000]
loss: 0.388779  [14976/50000]
loss: 0.345687  [19968/50000]
loss: 0.346043  [24960/50000]
loss: 0.403008  [29952/50000]
loss: 0.347183  [34944/50000]
loss: 0.405996  [39936/50000]
loss: 0.360808  [44928/50000]
loss: 0.310199  [31200/50000]
Epoch average loss: 0.3621577322483063


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.393964  [    0/50000]
loss: 0.355902  [ 4992/50000]
loss: 0.404612  [ 9984/50000]
loss: 0.392822  [14976/50000]
loss: 0.315474  [19968/50000]
loss: 0.385478  [24960/50000]
loss: 0.320867  [29952/50000]
loss: 0.300892  [34944/50000]
loss: 0.387427  [39936/50000]
loss: 0.323765  [44928/50000]
loss: 0.382959  [31200/50000]
Epoch average loss: 0.3622788190841675


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.399670  [    0/50000]
loss: 0.369500  [ 4992/50000]
loss: 0.366769  [ 9984/50000]
loss: 0.418320  [14976/50000]
loss: 0.388011  [19968/50000]
loss: 0.348076  [24960/50000]
loss: 0.325025  [29952/50000]
loss: 0.404832  [34944/50000]
loss: 0.449906  [39936/50000]
loss: 0.398660  [44928/50000]
loss: 0.286074  [31200/50000]
Epoch average loss: 0.3623354136943817
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 24 - Done
Seed 0 - Iteration 1 - Model 25 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.322241  [    0/50000]
loss: 0.356952  [ 4992/50000]
loss: 0.289560  [ 9984/50000]
loss: 0.381785  [14976/50000]
loss: 0.389962  [19968/50000]
loss: 0.344300  [24960/50000]
loss: 0.404704  [29952/50000]
loss: 0.400658  [34944/50000]
loss: 0.397474  [39936/50000]
loss: 0.403833  [44928/50000]
loss: 0.320530  [31200/50000]
Epoch average loss: 0.36479654908180237


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.366795  [    0/50000]
loss: 0.416388  [ 4992/50000]
loss: 0.425864  [ 9984/50000]
loss: 0.331994  [14976/50000]
loss: 0.325872  [19968/50000]
loss: 0.345786  [24960/50000]
loss: 0.385902  [29952/50000]
loss: 0.337761  [34944/50000]
loss: 0.409399  [39936/50000]
loss: 0.369960  [44928/50000]
loss: 0.357539  [31200/50000]
Epoch average loss: 0.3645392060279846


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.323439  [    0/50000]
loss: 0.385340  [ 4992/50000]
loss: 0.359060  [ 9984/50000]
loss: 0.340236  [14976/50000]
loss: 0.391097  [19968/50000]
loss: 0.395110  [24960/50000]
loss: 0.362624  [29952/50000]
loss: 0.353774  [34944/50000]
loss: 0.318166  [39936/50000]
loss: 0.365845  [44928/50000]
loss: 0.352316  [31200/50000]
Epoch average loss: 0.36224427819252014
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 25 - Done
Seed 0 - Iteration 1 - Model 26 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.336643  [    0/50000]
loss: 0.318669  [ 4992/50000]
loss: 0.335045  [ 9984/50000]
loss: 0.411398  [14976/50000]
loss: 0.413485  [19968/50000]
loss: 0.330947  [24960/50000]
loss: 0.354230  [29952/50000]
loss: 0.342914  [34944/50000]
loss: 0.415429  [39936/50000]
loss: 0.406255  [44928/50000]
loss: 0.313470  [31200/50000]
Epoch average loss: 0.36004361510276794


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.386883  [    0/50000]
loss: 0.352305  [ 4992/50000]
loss: 0.353350  [ 9984/50000]
loss: 0.425295  [14976/50000]
loss: 0.320889  [19968/50000]
loss: 0.378136  [24960/50000]
loss: 0.358442  [29952/50000]
loss: 0.337707  [34944/50000]
loss: 0.319039  [39936/50000]
loss: 0.391901  [44928/50000]
loss: 0.369401  [31200/50000]
Epoch average loss: 0.3603772819042206


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.366482  [    0/50000]
loss: 0.414986  [ 4992/50000]
loss: 0.369441  [ 9984/50000]
loss: 0.435468  [14976/50000]
loss: 0.374962  [19968/50000]
loss: 0.379233  [24960/50000]
loss: 0.308789  [29952/50000]
loss: 0.357505  [34944/50000]
loss: 0.318620  [39936/50000]
loss: 0.302206  [44928/50000]
loss: 0.314335  [31200/50000]
Epoch average loss: 0.3595105707645416
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 26 - Done
Seed 0 - Iteration 1 - Model 27 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.325485  [    0/50000]
loss: 0.304996  [ 4992/50000]
loss: 0.451509  [ 9984/50000]
loss: 0.329021  [14976/50000]
loss: 0.412109  [19968/50000]
loss: 0.388679  [24960/50000]
loss: 0.349663  [29952/50000]
loss: 0.388563  [34944/50000]
loss: 0.332583  [39936/50000]
loss: 0.388341  [44928/50000]
loss: 0.367059  [31200/50000]
Epoch average loss: 0.36254069209098816


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.405747  [    0/50000]
loss: 0.377731  [ 4992/50000]
loss: 0.303790  [ 9984/50000]
loss: 0.365424  [14976/50000]
loss: 0.365858  [19968/50000]
loss: 0.361084  [24960/50000]
loss: 0.416871  [29952/50000]
loss: 0.404686  [34944/50000]
loss: 0.411048  [39936/50000]
loss: 0.323373  [44928/50000]
loss: 0.392105  [31200/50000]
Epoch average loss: 0.3607514500617981


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.376333  [    0/50000]
loss: 0.363936  [ 4992/50000]
loss: 0.318429  [ 9984/50000]
loss: 0.405978  [14976/50000]
loss: 0.383650  [19968/50000]
loss: 0.367164  [24960/50000]
loss: 0.395686  [29952/50000]
loss: 0.409510  [34944/50000]
loss: 0.387117  [39936/50000]
loss: 0.347774  [44928/50000]
loss: 0.318932  [31200/50000]
Epoch average loss: 0.36168593168258667
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 27 - Done
Seed 0 - Iteration 1 - Model 28 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.347708  [    0/50000]
loss: 0.316711  [ 4992/50000]
loss: 0.291582  [ 9984/50000]
loss: 0.379094  [14976/50000]
loss: 0.414849  [19968/50000]
loss: 0.369684  [24960/50000]
loss: 0.374522  [29952/50000]
loss: 0.370054  [34944/50000]
loss: 0.318456  [39936/50000]
loss: 0.395565  [44928/50000]
loss: 0.361076  [31200/50000]
Epoch average loss: 0.36203742027282715


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.360095  [    0/50000]
loss: 0.363717  [ 4992/50000]
loss: 0.361006  [ 9984/50000]
loss: 0.355467  [14976/50000]
loss: 0.365552  [19968/50000]
loss: 0.320903  [24960/50000]
loss: 0.364480  [29952/50000]
loss: 0.371073  [34944/50000]
loss: 0.364466  [39936/50000]
loss: 0.378381  [44928/50000]
loss: 0.420839  [31200/50000]
Epoch average loss: 0.36289939284324646


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.357809  [    0/50000]
loss: 0.400564  [ 4992/50000]
loss: 0.317422  [ 9984/50000]
loss: 0.392423  [14976/50000]
loss: 0.381952  [19968/50000]
loss: 0.370782  [24960/50000]
loss: 0.371298  [29952/50000]
loss: 0.294877  [34944/50000]
loss: 0.338849  [39936/50000]
loss: 0.398045  [44928/50000]
loss: 0.379784  [31200/50000]
Epoch average loss: 0.36077722907066345
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 28 - Done
Seed 0 - Iteration 1 - Model 29 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.352018  [    0/50000]
loss: 0.423356  [ 4992/50000]
loss: 0.299930  [ 9984/50000]
loss: 0.400333  [14976/50000]
loss: 0.381279  [19968/50000]
loss: 0.335182  [24960/50000]
loss: 0.385997  [29952/50000]
loss: 0.369420  [34944/50000]
loss: 0.400315  [39936/50000]
loss: 0.296129  [44928/50000]
loss: 0.357260  [31200/50000]
Epoch average loss: 0.3598845601081848


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.394789  [    0/50000]
loss: 0.405474  [ 4992/50000]
loss: 0.324790  [ 9984/50000]
loss: 0.335003  [14976/50000]
loss: 0.452214  [19968/50000]
loss: 0.342710  [24960/50000]
loss: 0.371843  [29952/50000]
loss: 0.385508  [34944/50000]
loss: 0.351016  [39936/50000]
loss: 0.384247  [44928/50000]
loss: 0.305134  [31200/50000]
Epoch average loss: 0.36102479696273804


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.361217  [    0/50000]
loss: 0.328080  [ 4992/50000]
loss: 0.385016  [ 9984/50000]
loss: 0.353343  [14976/50000]
loss: 0.282215  [19968/50000]
loss: 0.416386  [24960/50000]
loss: 0.346544  [29952/50000]
loss: 0.399365  [34944/50000]
loss: 0.358041  [39936/50000]
loss: 0.324633  [44928/50000]
loss: 0.309209  [31200/50000]
Epoch average loss: 0.35942140221595764
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 29 - Done
Seed 0 - Iteration 1 - Model 30 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.379597  [    0/50000]
loss: 0.415265  [ 4992/50000]
loss: 0.321619  [ 9984/50000]
loss: 0.431284  [14976/50000]
loss: 0.391149  [19968/50000]
loss: 0.362182  [24960/50000]
loss: 0.373485  [29952/50000]
loss: 0.427738  [34944/50000]
loss: 0.353298  [39936/50000]
loss: 0.272263  [44928/50000]
loss: 0.365853  [31200/50000]
Epoch average loss: 0.3624434769153595


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.377114  [    0/50000]
loss: 0.395981  [ 4992/50000]
loss: 0.303844  [ 9984/50000]
loss: 0.316787  [14976/50000]
loss: 0.482280  [19968/50000]
loss: 0.417946  [24960/50000]
loss: 0.295092  [29952/50000]
loss: 0.380495  [34944/50000]
loss: 0.429588  [39936/50000]
loss: 0.334199  [44928/50000]
loss: 0.353747  [31200/50000]
Epoch average loss: 0.3618301451206207


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.307015  [    0/50000]
loss: 0.334931  [ 4992/50000]
loss: 0.365072  [ 9984/50000]
loss: 0.366882  [14976/50000]
loss: 0.380755  [19968/50000]
loss: 0.329359  [24960/50000]
loss: 0.341403  [29952/50000]
loss: 0.346261  [34944/50000]
loss: 0.371161  [39936/50000]
loss: 0.380058  [44928/50000]
loss: 0.383326  [31200/50000]
Epoch average loss: 0.36116859316825867
Done!


  0%|          | 0/3473 [00:00<?, ?it/s]

Seed 0 - Iteration 1 - Model 30 - Done
Seed 0 - Iteration 2 - Model 1 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.423399  [    0/50000]
loss: 0.302675  [ 4992/50000]
loss: 0.392542  [ 9984/50000]
loss: 0.383389  [14976/50000]
loss: 0.385263  [19968/50000]
loss: 0.366612  [24960/50000]
loss: 0.394200  [29952/50000]
loss: 0.343392  [34944/50000]
loss: 0.355751  [39936/50000]
loss: 0.435439  [44928/50000]
loss: 0.440298  [31200/50000]
Epoch average loss: 0.40322011709213257


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.344407  [    0/50000]
loss: 0.428113  [ 4992/50000]
loss: 0.393314  [ 9984/50000]
loss: 0.416195  [14976/50000]
loss: 0.407200  [19968/50000]
loss: 0.403148  [24960/50000]
loss: 0.417466  [29952/50000]
loss: 0.440147  [34944/50000]
loss: 0.433632  [39936/50000]
loss: 0.355775  [44928/50000]
loss: 0.385628  [31200/50000]
Epoch average loss: 0.4018418788909912


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.424071  [    0/50000]
loss: 0.357049  [ 4992/50000]
loss: 0.459892  [ 9984/50000]
loss: 0.407787  [14976/50000]
loss: 0.354743  [19968/50000]
loss: 0.408181  [24960/50000]
loss: 0.396640  [29952/50000]
loss: 0.361799  [34944/50000]
loss: 0.409775  [39936/50000]
loss: 0.430625  [44928/50000]
loss: 0.440888  [31200/50000]
Epoch average loss: 0.3996305465698242
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 1 - Done
Seed 0 - Iteration 2 - Model 2 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.433620  [    0/50000]
loss: 0.500869  [ 4992/50000]
loss: 0.328286  [ 9984/50000]
loss: 0.395211  [14976/50000]
loss: 0.429705  [19968/50000]
loss: 0.322753  [24960/50000]
loss: 0.386794  [29952/50000]
loss: 0.335783  [34944/50000]
loss: 0.412689  [39936/50000]
loss: 0.467904  [44928/50000]
loss: 0.362160  [31200/50000]
Epoch average loss: 0.4045669138431549


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.370098  [    0/50000]
loss: 0.398687  [ 4992/50000]
loss: 0.414536  [ 9984/50000]
loss: 0.364619  [14976/50000]
loss: 0.437779  [19968/50000]
loss: 0.355050  [24960/50000]
loss: 0.384351  [29952/50000]
loss: 0.345413  [34944/50000]
loss: 0.413164  [39936/50000]
loss: 0.498346  [44928/50000]
loss: 0.416997  [31200/50000]
Epoch average loss: 0.4029448330402374


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.428917  [    0/50000]
loss: 0.373645  [ 4992/50000]
loss: 0.415088  [ 9984/50000]
loss: 0.453657  [14976/50000]
loss: 0.376179  [19968/50000]
loss: 0.422153  [24960/50000]
loss: 0.388073  [29952/50000]
loss: 0.420809  [34944/50000]
loss: 0.444149  [39936/50000]
loss: 0.453484  [44928/50000]
loss: 0.410399  [31200/50000]
Epoch average loss: 0.4032665193080902
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 2 - Done
Seed 0 - Iteration 2 - Model 3 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.383015  [    0/50000]
loss: 0.389439  [ 4992/50000]
loss: 0.413101  [ 9984/50000]
loss: 0.402549  [14976/50000]
loss: 0.407833  [19968/50000]
loss: 0.387415  [24960/50000]
loss: 0.344182  [29952/50000]
loss: 0.436585  [34944/50000]
loss: 0.406019  [39936/50000]
loss: 0.364024  [44928/50000]
loss: 0.364165  [31200/50000]
Epoch average loss: 0.4039818346500397


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.358946  [    0/50000]
loss: 0.410348  [ 4992/50000]
loss: 0.413045  [ 9984/50000]
loss: 0.439927  [14976/50000]
loss: 0.452203  [19968/50000]
loss: 0.432634  [24960/50000]
loss: 0.394162  [29952/50000]
loss: 0.351032  [34944/50000]
loss: 0.375529  [39936/50000]
loss: 0.423860  [44928/50000]
loss: 0.413378  [31200/50000]
Epoch average loss: 0.4027215540409088


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.439281  [    0/50000]
loss: 0.430097  [ 4992/50000]
loss: 0.393143  [ 9984/50000]
loss: 0.447434  [14976/50000]
loss: 0.389304  [19968/50000]
loss: 0.368687  [24960/50000]
loss: 0.379284  [29952/50000]
loss: 0.345234  [34944/50000]
loss: 0.362452  [39936/50000]
loss: 0.366821  [44928/50000]
loss: 0.376924  [31200/50000]
Epoch average loss: 0.4015220105648041
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 3 - Done
Seed 0 - Iteration 2 - Model 4 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.418791  [    0/50000]
loss: 0.471192  [ 4992/50000]
loss: 0.443224  [ 9984/50000]
loss: 0.448499  [14976/50000]
loss: 0.459212  [19968/50000]
loss: 0.324615  [24960/50000]
loss: 0.449149  [29952/50000]
loss: 0.397107  [34944/50000]
loss: 0.352568  [39936/50000]
loss: 0.386257  [44928/50000]
loss: 0.336917  [31200/50000]
Epoch average loss: 0.4081045091152191


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.439760  [    0/50000]
loss: 0.407144  [ 4992/50000]
loss: 0.344865  [ 9984/50000]
loss: 0.361813  [14976/50000]
loss: 0.362808  [19968/50000]
loss: 0.452253  [24960/50000]
loss: 0.347060  [29952/50000]
loss: 0.434801  [34944/50000]
loss: 0.425731  [39936/50000]
loss: 0.428192  [44928/50000]
loss: 0.355006  [31200/50000]
Epoch average loss: 0.406343936920166


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.385841  [    0/50000]
loss: 0.404457  [ 4992/50000]
loss: 0.390952  [ 9984/50000]
loss: 0.403180  [14976/50000]
loss: 0.396373  [19968/50000]
loss: 0.370626  [24960/50000]
loss: 0.388191  [29952/50000]
loss: 0.472102  [34944/50000]
loss: 0.369269  [39936/50000]
loss: 0.399417  [44928/50000]
loss: 0.357511  [31200/50000]
Epoch average loss: 0.40530845522880554
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 4 - Done
Seed 0 - Iteration 2 - Model 5 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.419736  [    0/50000]
loss: 0.360138  [ 4992/50000]
loss: 0.493773  [ 9984/50000]
loss: 0.404335  [14976/50000]
loss: 0.496682  [19968/50000]
loss: 0.431751  [24960/50000]
loss: 0.351370  [29952/50000]
loss: 0.354239  [34944/50000]
loss: 0.383124  [39936/50000]
loss: 0.375799  [44928/50000]
loss: 0.377276  [31200/50000]
Epoch average loss: 0.40551912784576416


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.363934  [    0/50000]
loss: 0.399205  [ 4992/50000]
loss: 0.415937  [ 9984/50000]
loss: 0.386801  [14976/50000]
loss: 0.450846  [19968/50000]
loss: 0.374574  [24960/50000]
loss: 0.431683  [29952/50000]
loss: 0.366053  [34944/50000]
loss: 0.428131  [39936/50000]
loss: 0.451308  [44928/50000]
loss: 0.441053  [31200/50000]
Epoch average loss: 0.4038797914981842


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.351874  [    0/50000]
loss: 0.410197  [ 4992/50000]
loss: 0.426792  [ 9984/50000]
loss: 0.470587  [14976/50000]
loss: 0.361718  [19968/50000]
loss: 0.399265  [24960/50000]
loss: 0.396686  [29952/50000]
loss: 0.405999  [34944/50000]
loss: 0.449320  [39936/50000]
loss: 0.457719  [44928/50000]
loss: 0.355139  [31200/50000]
Epoch average loss: 0.4031127095222473
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 5 - Done
Seed 0 - Iteration 2 - Model 6 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.421538  [    0/50000]
loss: 0.400827  [ 4992/50000]
loss: 0.430678  [ 9984/50000]
loss: 0.376245  [14976/50000]
loss: 0.423263  [19968/50000]
loss: 0.416701  [24960/50000]
loss: 0.351908  [29952/50000]
loss: 0.403046  [34944/50000]
loss: 0.390089  [39936/50000]
loss: 0.427739  [44928/50000]
loss: 0.336523  [31200/50000]
Epoch average loss: 0.4054267108440399


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.360753  [    0/50000]
loss: 0.422741  [ 4992/50000]
loss: 0.402907  [ 9984/50000]
loss: 0.459040  [14976/50000]
loss: 0.424203  [19968/50000]
loss: 0.359729  [24960/50000]
loss: 0.387430  [29952/50000]
loss: 0.432000  [34944/50000]
loss: 0.432467  [39936/50000]
loss: 0.407376  [44928/50000]
loss: 0.391482  [31200/50000]
Epoch average loss: 0.4036831855773926


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.431322  [    0/50000]
loss: 0.484038  [ 4992/50000]
loss: 0.447567  [ 9984/50000]
loss: 0.373723  [14976/50000]
loss: 0.430251  [19968/50000]
loss: 0.373442  [24960/50000]
loss: 0.359392  [29952/50000]
loss: 0.390061  [34944/50000]
loss: 0.461378  [39936/50000]
loss: 0.361978  [44928/50000]
loss: 0.391352  [31200/50000]
Epoch average loss: 0.4036931097507477
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 6 - Done
Seed 0 - Iteration 2 - Model 7 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.352012  [    0/50000]
loss: 0.327675  [ 4992/50000]
loss: 0.348464  [ 9984/50000]
loss: 0.411342  [14976/50000]
loss: 0.412634  [19968/50000]
loss: 0.446359  [24960/50000]
loss: 0.388535  [29952/50000]
loss: 0.386911  [34944/50000]
loss: 0.417526  [39936/50000]
loss: 0.424262  [44928/50000]
loss: 0.383314  [31200/50000]
Epoch average loss: 0.4035056233406067


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.428088  [    0/50000]
loss: 0.374852  [ 4992/50000]
loss: 0.480849  [ 9984/50000]
loss: 0.341740  [14976/50000]
loss: 0.438371  [19968/50000]
loss: 0.365715  [24960/50000]
loss: 0.391534  [29952/50000]
loss: 0.425291  [34944/50000]
loss: 0.431229  [39936/50000]
loss: 0.413105  [44928/50000]
loss: 0.424604  [31200/50000]
Epoch average loss: 0.40428417921066284


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.394160  [    0/50000]
loss: 0.406508  [ 4992/50000]
loss: 0.339182  [ 9984/50000]
loss: 0.475965  [14976/50000]
loss: 0.395435  [19968/50000]
loss: 0.391911  [24960/50000]
loss: 0.394612  [29952/50000]
loss: 0.416955  [34944/50000]
loss: 0.447256  [39936/50000]
loss: 0.449754  [44928/50000]
loss: 0.329297  [31200/50000]
Epoch average loss: 0.40223100781440735
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 7 - Done
Seed 0 - Iteration 2 - Model 8 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.443066  [    0/50000]
loss: 0.382649  [ 4992/50000]
loss: 0.398004  [ 9984/50000]
loss: 0.456402  [14976/50000]
loss: 0.432535  [19968/50000]
loss: 0.451778  [24960/50000]
loss: 0.489343  [29952/50000]
loss: 0.439061  [34944/50000]
loss: 0.403285  [39936/50000]
loss: 0.403067  [44928/50000]
loss: 0.433797  [31200/50000]
Epoch average loss: 0.40635550022125244


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.350963  [    0/50000]
loss: 0.437150  [ 4992/50000]
loss: 0.402840  [ 9984/50000]
loss: 0.340451  [14976/50000]
loss: 0.354638  [19968/50000]
loss: 0.389803  [24960/50000]
loss: 0.384522  [29952/50000]
loss: 0.394367  [34944/50000]
loss: 0.406882  [39936/50000]
loss: 0.404035  [44928/50000]
loss: 0.359375  [31200/50000]
Epoch average loss: 0.4043329060077667


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.352987  [    0/50000]
loss: 0.422826  [ 4992/50000]
loss: 0.420037  [ 9984/50000]
loss: 0.382321  [14976/50000]
loss: 0.427092  [19968/50000]
loss: 0.469196  [24960/50000]
loss: 0.356555  [29952/50000]
loss: 0.394868  [34944/50000]
loss: 0.432841  [39936/50000]
loss: 0.473942  [44928/50000]
loss: 0.444551  [31200/50000]
Epoch average loss: 0.40378332138061523
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 8 - Done
Seed 0 - Iteration 2 - Model 9 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.416583  [    0/50000]
loss: 0.385990  [ 4992/50000]
loss: 0.368016  [ 9984/50000]
loss: 0.396698  [14976/50000]
loss: 0.339176  [19968/50000]
loss: 0.487769  [24960/50000]
loss: 0.406694  [29952/50000]
loss: 0.361258  [34944/50000]
loss: 0.430671  [39936/50000]
loss: 0.419532  [44928/50000]
loss: 0.436454  [31200/50000]
Epoch average loss: 0.4052635729312897


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.373339  [    0/50000]
loss: 0.375979  [ 4992/50000]
loss: 0.384173  [ 9984/50000]
loss: 0.379632  [14976/50000]
loss: 0.410065  [19968/50000]
loss: 0.369381  [24960/50000]
loss: 0.457454  [29952/50000]
loss: 0.431079  [34944/50000]
loss: 0.355044  [39936/50000]
loss: 0.393504  [44928/50000]
loss: 0.319270  [31200/50000]
Epoch average loss: 0.4036844074726105


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.368216  [    0/50000]
loss: 0.399830  [ 4992/50000]
loss: 0.413145  [ 9984/50000]
loss: 0.422153  [14976/50000]
loss: 0.335064  [19968/50000]
loss: 0.417916  [24960/50000]
loss: 0.432624  [29952/50000]
loss: 0.412463  [34944/50000]
loss: 0.392802  [39936/50000]
loss: 0.473787  [44928/50000]
loss: 0.400748  [31200/50000]
Epoch average loss: 0.40323829650878906
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 9 - Done
Seed 0 - Iteration 2 - Model 10 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.491985  [    0/50000]
loss: 0.400741  [ 4992/50000]
loss: 0.479795  [ 9984/50000]
loss: 0.398778  [14976/50000]
loss: 0.447296  [19968/50000]
loss: 0.489348  [24960/50000]
loss: 0.439979  [29952/50000]
loss: 0.391295  [34944/50000]
loss: 0.357241  [39936/50000]
loss: 0.374901  [44928/50000]
loss: 0.463129  [31200/50000]
Epoch average loss: 0.4047226905822754


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.397155  [    0/50000]
loss: 0.402755  [ 4992/50000]
loss: 0.433645  [ 9984/50000]
loss: 0.444531  [14976/50000]
loss: 0.384680  [19968/50000]
loss: 0.404025  [24960/50000]
loss: 0.374075  [29952/50000]
loss: 0.421726  [34944/50000]
loss: 0.399401  [39936/50000]
loss: 0.363305  [44928/50000]
loss: 0.338316  [31200/50000]
Epoch average loss: 0.4041478931903839


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.423077  [    0/50000]
loss: 0.425543  [ 4992/50000]
loss: 0.382443  [ 9984/50000]
loss: 0.440358  [14976/50000]
loss: 0.421700  [19968/50000]
loss: 0.368381  [24960/50000]
loss: 0.372002  [29952/50000]
loss: 0.387048  [34944/50000]
loss: 0.350004  [39936/50000]
loss: 0.423078  [44928/50000]
loss: 0.373922  [31200/50000]
Epoch average loss: 0.40421831607818604
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 10 - Done
Seed 0 - Iteration 2 - Model 11 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.419720  [    0/50000]
loss: 0.455153  [ 4992/50000]
loss: 0.395461  [ 9984/50000]
loss: 0.403939  [14976/50000]
loss: 0.373590  [19968/50000]
loss: 0.433290  [24960/50000]
loss: 0.405944  [29952/50000]
loss: 0.353619  [34944/50000]
loss: 0.428109  [39936/50000]
loss: 0.469534  [44928/50000]
loss: 0.393412  [31200/50000]
Epoch average loss: 0.4050902724266052


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.422221  [    0/50000]
loss: 0.344388  [ 4992/50000]
loss: 0.403546  [ 9984/50000]
loss: 0.383370  [14976/50000]
loss: 0.414964  [19968/50000]
loss: 0.404458  [24960/50000]
loss: 0.368241  [29952/50000]
loss: 0.460983  [34944/50000]
loss: 0.416824  [39936/50000]
loss: 0.402220  [44928/50000]
loss: 0.398739  [31200/50000]
Epoch average loss: 0.4022267460823059


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.406063  [    0/50000]
loss: 0.445852  [ 4992/50000]
loss: 0.429054  [ 9984/50000]
loss: 0.381680  [14976/50000]
loss: 0.384922  [19968/50000]
loss: 0.384767  [24960/50000]
loss: 0.343056  [29952/50000]
loss: 0.417059  [34944/50000]
loss: 0.350914  [39936/50000]
loss: 0.461193  [44928/50000]
loss: 0.540557  [31200/50000]
Epoch average loss: 0.4031677544116974
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 11 - Done
Seed 0 - Iteration 2 - Model 12 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.395138  [    0/50000]
loss: 0.436199  [ 4992/50000]
loss: 0.384448  [ 9984/50000]
loss: 0.379293  [14976/50000]
loss: 0.377020  [19968/50000]
loss: 0.367632  [24960/50000]
loss: 0.426968  [29952/50000]
loss: 0.459157  [34944/50000]
loss: 0.386941  [39936/50000]
loss: 0.380569  [44928/50000]
loss: 0.336422  [31200/50000]
Epoch average loss: 0.4020503759384155


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.389990  [    0/50000]
loss: 0.413687  [ 4992/50000]
loss: 0.313284  [ 9984/50000]
loss: 0.383332  [14976/50000]
loss: 0.408674  [19968/50000]
loss: 0.468209  [24960/50000]
loss: 0.413291  [29952/50000]
loss: 0.387315  [34944/50000]
loss: 0.456583  [39936/50000]
loss: 0.368068  [44928/50000]
loss: 0.391307  [31200/50000]
Epoch average loss: 0.40137752890586853


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.416752  [    0/50000]
loss: 0.390255  [ 4992/50000]
loss: 0.443792  [ 9984/50000]
loss: 0.320815  [14976/50000]
loss: 0.464529  [19968/50000]
loss: 0.374893  [24960/50000]
loss: 0.412321  [29952/50000]
loss: 0.378267  [34944/50000]
loss: 0.412938  [39936/50000]
loss: 0.386333  [44928/50000]
loss: 0.435528  [31200/50000]
Epoch average loss: 0.4024758040904999
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 12 - Done
Seed 0 - Iteration 2 - Model 13 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.390195  [    0/50000]
loss: 0.394577  [ 4992/50000]
loss: 0.460859  [ 9984/50000]
loss: 0.334285  [14976/50000]
loss: 0.380086  [19968/50000]
loss: 0.376626  [24960/50000]
loss: 0.395428  [29952/50000]
loss: 0.434817  [34944/50000]
loss: 0.417580  [39936/50000]
loss: 0.491605  [44928/50000]
loss: 0.348967  [31200/50000]
Epoch average loss: 0.40490612387657166


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.406971  [    0/50000]
loss: 0.405876  [ 4992/50000]
loss: 0.406321  [ 9984/50000]
loss: 0.405907  [14976/50000]
loss: 0.396004  [19968/50000]
loss: 0.414084  [24960/50000]
loss: 0.423585  [29952/50000]
loss: 0.407568  [34944/50000]
loss: 0.482437  [39936/50000]
loss: 0.415362  [44928/50000]
loss: 0.400651  [31200/50000]
Epoch average loss: 0.40426838397979736


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.433226  [    0/50000]
loss: 0.414562  [ 4992/50000]
loss: 0.436801  [ 9984/50000]
loss: 0.369105  [14976/50000]
loss: 0.416130  [19968/50000]
loss: 0.448255  [24960/50000]
loss: 0.341297  [29952/50000]
loss: 0.444699  [34944/50000]
loss: 0.433188  [39936/50000]
loss: 0.454023  [44928/50000]
loss: 0.342862  [31200/50000]
Epoch average loss: 0.4034767150878906
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 13 - Done
Seed 0 - Iteration 2 - Model 14 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.387768  [    0/50000]
loss: 0.405902  [ 4992/50000]
loss: 0.375575  [ 9984/50000]
loss: 0.352536  [14976/50000]
loss: 0.446264  [19968/50000]
loss: 0.451726  [24960/50000]
loss: 0.410479  [29952/50000]
loss: 0.448669  [34944/50000]
loss: 0.402336  [39936/50000]
loss: 0.423111  [44928/50000]
loss: 0.553411  [31200/50000]
Epoch average loss: 0.40386995673179626


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.340839  [    0/50000]
loss: 0.436730  [ 4992/50000]
loss: 0.390554  [ 9984/50000]
loss: 0.367286  [14976/50000]
loss: 0.449672  [19968/50000]
loss: 0.335207  [24960/50000]
loss: 0.412252  [29952/50000]
loss: 0.387901  [34944/50000]
loss: 0.373193  [39936/50000]
loss: 0.416752  [44928/50000]
loss: 0.335665  [31200/50000]
Epoch average loss: 0.4027892053127289


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.405188  [    0/50000]
loss: 0.404876  [ 4992/50000]
loss: 0.337005  [ 9984/50000]
loss: 0.401396  [14976/50000]
loss: 0.492069  [19968/50000]
loss: 0.367510  [24960/50000]
loss: 0.356871  [29952/50000]
loss: 0.382807  [34944/50000]
loss: 0.348491  [39936/50000]
loss: 0.413920  [44928/50000]
loss: 0.381128  [31200/50000]
Epoch average loss: 0.40230515599250793
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 14 - Done
Seed 0 - Iteration 2 - Model 15 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.412965  [    0/50000]
loss: 0.460096  [ 4992/50000]
loss: 0.426034  [ 9984/50000]
loss: 0.437353  [14976/50000]
loss: 0.448321  [19968/50000]
loss: 0.462273  [24960/50000]
loss: 0.473677  [29952/50000]
loss: 0.366596  [34944/50000]
loss: 0.432496  [39936/50000]
loss: 0.433041  [44928/50000]
loss: 0.410971  [31200/50000]
Epoch average loss: 0.4076092839241028


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.433451  [    0/50000]
loss: 0.377925  [ 4992/50000]
loss: 0.449704  [ 9984/50000]
loss: 0.447768  [14976/50000]
loss: 0.399312  [19968/50000]
loss: 0.399962  [24960/50000]
loss: 0.381912  [29952/50000]
loss: 0.441661  [34944/50000]
loss: 0.393626  [39936/50000]
loss: 0.418839  [44928/50000]
loss: 0.472293  [31200/50000]
Epoch average loss: 0.4069726765155792


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.419860  [    0/50000]
loss: 0.380705  [ 4992/50000]
loss: 0.410836  [ 9984/50000]
loss: 0.366034  [14976/50000]
loss: 0.446443  [19968/50000]
loss: 0.442811  [24960/50000]
loss: 0.420510  [29952/50000]
loss: 0.388687  [34944/50000]
loss: 0.416584  [39936/50000]
loss: 0.402254  [44928/50000]
loss: 0.390446  [31200/50000]
Epoch average loss: 0.40725913643836975
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 15 - Done
Seed 0 - Iteration 2 - Model 16 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.464679  [    0/50000]
loss: 0.403995  [ 4992/50000]
loss: 0.402352  [ 9984/50000]
loss: 0.376603  [14976/50000]
loss: 0.386065  [19968/50000]
loss: 0.367942  [24960/50000]
loss: 0.404427  [29952/50000]
loss: 0.392440  [34944/50000]
loss: 0.377312  [39936/50000]
loss: 0.430007  [44928/50000]
loss: 0.507703  [31200/50000]
Epoch average loss: 0.40537187457084656


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.384469  [    0/50000]
loss: 0.381264  [ 4992/50000]
loss: 0.397956  [ 9984/50000]
loss: 0.422595  [14976/50000]
loss: 0.410794  [19968/50000]
loss: 0.402590  [24960/50000]
loss: 0.395272  [29952/50000]
loss: 0.476175  [34944/50000]
loss: 0.384336  [39936/50000]
loss: 0.472523  [44928/50000]
loss: 0.458235  [31200/50000]
Epoch average loss: 0.40455058217048645


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.391535  [    0/50000]
loss: 0.411890  [ 4992/50000]
loss: 0.431069  [ 9984/50000]
loss: 0.414064  [14976/50000]
loss: 0.343838  [19968/50000]
loss: 0.426296  [24960/50000]
loss: 0.456756  [29952/50000]
loss: 0.406646  [34944/50000]
loss: 0.339297  [39936/50000]
loss: 0.380828  [44928/50000]
loss: 0.464699  [31200/50000]
Epoch average loss: 0.40308722853660583
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 16 - Done
Seed 0 - Iteration 2 - Model 17 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.438761  [    0/50000]
loss: 0.400246  [ 4992/50000]
loss: 0.423414  [ 9984/50000]
loss: 0.372455  [14976/50000]
loss: 0.425501  [19968/50000]
loss: 0.469159  [24960/50000]
loss: 0.416316  [29952/50000]
loss: 0.372584  [34944/50000]
loss: 0.329336  [39936/50000]
loss: 0.422975  [44928/50000]
loss: 0.344702  [31200/50000]
Epoch average loss: 0.40355220437049866


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.384906  [    0/50000]
loss: 0.349315  [ 4992/50000]
loss: 0.479319  [ 9984/50000]
loss: 0.398856  [14976/50000]
loss: 0.432477  [19968/50000]
loss: 0.431141  [24960/50000]
loss: 0.370067  [29952/50000]
loss: 0.424092  [34944/50000]
loss: 0.433629  [39936/50000]
loss: 0.385605  [44928/50000]
loss: 0.401566  [31200/50000]
Epoch average loss: 0.4022217392921448


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.391356  [    0/50000]
loss: 0.393511  [ 4992/50000]
loss: 0.402859  [ 9984/50000]
loss: 0.387313  [14976/50000]
loss: 0.400517  [19968/50000]
loss: 0.327724  [24960/50000]
loss: 0.441060  [29952/50000]
loss: 0.405929  [34944/50000]
loss: 0.399493  [39936/50000]
loss: 0.429088  [44928/50000]
loss: 0.388619  [31200/50000]
Epoch average loss: 0.40122827887535095
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

Seed 0 - Iteration 2 - Model 17 - Done
Seed 0 - Iteration 2 - Model 18 - Begin


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.370214  [    0/50000]
loss: 0.405603  [ 4992/50000]
loss: 0.338058  [ 9984/50000]
loss: 0.406814  [14976/50000]
loss: 0.395296  [19968/50000]
loss: 0.358272  [24960/50000]
loss: 0.401506  [29952/50000]
loss: 0.403229  [34944/50000]
loss: 0.434024  [39936/50000]
loss: 0.381814  [44928/50000]
loss: 0.322242  [31200/50000]
Epoch average loss: 0.40837278962135315


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.448676  [    0/50000]
loss: 0.425324  [ 4992/50000]
loss: 0.373182  [ 9984/50000]
loss: 0.411428  [14976/50000]
loss: 0.393311  [19968/50000]
loss: 0.329645  [24960/50000]
loss: 0.401911  [29952/50000]
loss: 0.348168  [34944/50000]
loss: 0.402510  [39936/50000]
loss: 0.368507  [44928/50000]
loss: 0.472226  [31200/50000]
Epoch average loss: 0.40450945496559143


  0%|          | 0/391 [00:00<?, ?it/s]

loss: 0.432413  [    0/50000]
loss: 0.378160  [ 4992/50000]
loss: 0.408542  [ 9984/50000]
loss: 0.430645  [14976/50000]
loss: 0.445198  [19968/50000]
loss: 0.431452  [24960/50000]
loss: 0.383269  [29952/50000]
loss: 0.431964  [34944/50000]
loss: 0.436762  [39936/50000]
loss: 0.467192  [44928/50000]
loss: 0.432288  [31200/50000]
Epoch average loss: 0.40595898032188416
Done!


  0%|          | 0/2691 [00:00<?, ?it/s]

In [None]:
# write out list of removed indices for further analysis
with open('removed_idx.pkl', 'wb') as f:
    pickle.dump(removed_idx, f)