In [93]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import random
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import shutil

sys.path.append('../')
from meta_fusion.benchmarks import *
from meta_fusion.methods import *
from meta_fusion.models import *
from meta_fusion.utils import *
from meta_fusion.third_party import *
from meta_fusion.synthetic_data import PrepareSyntheticData
from meta_fusion.config import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [113]:
repetition=1
seed=1

# Data model parameters
n = 2000
dim_modalities = [2000, 400]
noise_ratios = [0.3, 0.3]
trans_type = ["linear", "linear", "quadratic"]
dim_latent = [50, 30, 20]
mod_prop = [0, 0, 1, 0]
interactive_prop = 0

#mod_outs = [[0, 60, 80, 100, 120],[0, 60, 80, 100, 120]]
mod_outs = [[0, 50, 60, 80],[0, 60]]
num_modalities = 2
combined_hiddens = [300,200,100]
mod1_hiddens = mod2_hiddens = [[128],[128]]

# data parameters
data_name = 'regression'
exp_name = "negative_learning"
output_dim = 1  # specify the output dimension for regression

extractor_type = 'PCA'
if extractor_type == 'encoder':
    separate=False
    is_mod_static=[False]*num_modalities  
elif extractor_type == "separate":
    separate=True
    is_mod_static=[False]*num_modalities  
elif extractor_type == 'PCA':
    separate=False
    is_mod_static=[True]*num_modalities  
freeze_mod_extractors=[False]*num_modalities

# Load default model configurations 
config = load_config('../experiments_synthetic/config.json')

# Model files directory
config['ckpt_dir'] = f"./checkpoints/{exp_name}/{seed}/"

# Update other training parameters
config['divergence_weight_type'] = 'clustering'
config['rho_list'] = [0,5,10]
config['optimal_k'] = None
config['output_dim'] = output_dim
config["init_lr"] = 0.001
config["epochs"] = 30
#config["epochs"] = 2
config["ensemble_methods"] = [
        ]

In [114]:
data_preparer = PrepareSyntheticData(data_name = data_name, test_size = 0.2, val_size = 0.2)

In [115]:
def run_single_experiment(config, n, random_state):

    config['random_state'] = random_state
    res_list = []
    best_rho = {}
    cohort_pairs = {}
    cluster_idxs = {}


    #----------------#
    # Split dataset  #
    #----------------#
    train_loader, val_loader, test_loader, oracle_train_loader, oracle_val_loader, oracle_test_loader =\
    data_preparer.get_data_loaders(n, trans_type=trans_type, mod_prop=mod_prop, 
                                   interactive_prop = interactive_prop,
                                   dim_modalities=dim_modalities, dim_latent=dim_latent,
                                   noise_ratios=noise_ratios, random_state=random_state)
    # Get data info
    data_info = data_preparer.get_data_info()
    n = data_info[1]
    n_train = data_info[2]
    n_val = data_info[3]
    n_test = data_info[4]

    print(f"Finished splitting {data_name} dataset. Data information are summarized below:\n"
            f"Modality dimensions: {dim_modalities}\n"
            f"Data size: {n}\n"
            f"Train size: {n_train}\n"
            f"Val size: {n_val}\n"
            f"Test size: {n_test}")
    sys.stdout.flush() 


    #----------------------------#
    #  Adversarial Meta Fusion   #
    #----------------------------#
    meta_extractor = Extractors(mod_outs, dim_modalities, train_loader, val_loader)
    if (extractor_type == 'encoder') or (extractor_type == 'separate'):
        _ = meta_extractor.get_encoder_extractors(mod_hiddens, separate=separate, config=extractor_config)
    elif extractor_type == 'PCA':
        _ = meta_extractor.get_PCA_extractors()
    cohort = Cohorts(extractors=meta_extractor, combined_hidden_layers=combined_hiddens, output_dim=output_dim,
                     is_mod_static=is_mod_static, freeze_mod_extractors=freeze_mod_extractors)

    cohort_models = cohort.get_cohort_models()
    _, dim_pairs = cohort.get_cohort_info()
    trainer = AdversarialTrainer(config, cohort_models, [train_loader, val_loader])

    # Cohort with normal adaptive weights 
    trainer.train_adaptive() 
    res = trainer.test_adaptive(test_loader)
    res_list.append(res)
    
    best_rho['cohort'] = trainer.best_rho
    cohort_pairs['cohort'] = dim_pairs
    cluster_idxs['cohort'] = trainer.cluster_idxs
    print(f"Finished running adaptive meta fusion!")

    # Cohort with no mutual learning
    res = trainer.test_ablation(test_loader)
    res_list.append(res)
    
    cohort_pairs['indep_cohort'] = dim_pairs
    print(f"Finished testing indepedent cohort!")
    
    # Cohort with adversarial weights 
    trainer.train_adversarial() 
    res = trainer.test_adversarial(test_loader)
    res_list.append(res)
    
    best_rho['adversarial_cohort'] = trainer.fixed_rho
    cohort_pairs['adversarial_cohort'] = dim_pairs
    cluster_idxs['adversarial_cohort'] = trainer.cluster_idxs
    print(f"Finished running adversarial meta fusion!")

    results = []
    for res in res_list:
        for method, val in res.items():
            results.append({'Method': method, 'Test_metric': val, 
                            'best_rho':best_rho.get(method), 'cohort_pairs':cohort_pairs.get(method),
                            'cluster_idxs': cluster_idxs.get(method)})
    

    results = pd.DataFrame(results)

    results['random_state']=random_state
    results["dim_modalities"] = [dim_modalities] * len(results)
    results['n'] = n
    results['n_train'] = n_train
    results['n_val'] = n_val
    results['n_test'] = n_test 

    return results

In [116]:
results = []

for i in tqdm(range(1, repetition+1), desc="Repetitions", leave=True, position=0):
    print(f'Running with repetition {i}...')
    random_state = repetition * (seed-1) + i
    set_random_seed(random_state)
    
    # Run experiment
    tmp = run_single_experiment(config, n, random_state)
    
    results.append(tmp)

Repetitions:   0%|                                                                           | 0/1 [00:00<?, ?it/s]

Running with repetition 1...
Finished splitting regression dataset. Data information are summarized below:
Modality dimensions: [2000, 400]
Data size: 2000
Train size: 1280
Val size: 320
Test size: 400
Start training student cohort with normal adaptive weights...
Training with disagreement penalty = 0

Epoch: 1/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 280.28it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 330.47it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 350.99it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 358.25it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 362.57it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 370.17it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 371.35it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 184.970, train task loss: 184.970 - val loss: 125.662, val task loss: 125.662 [*] Best so far
model_2: train loss: 178.115, train task loss: 178.115 - val loss: 126.519, val task loss: 126.519 [*] Best so far
model_3: train loss: 176.925, train task loss: 176.925 - val loss: 117.450, val task loss: 117.450 [*] Best so far
model_4: train loss: 183.860, train task loss: 183.860 - val loss: 132.346, val task loss: 132.346 [*] Best so far
model_5: train loss: 180.163, train task loss: 180.163 - val loss: 124.063, val task loss: 124.063 [*] Best so far
model_6: train loss: 177.276, train task loss: 177.276 - val loss: 127.754, val task loss: 127.754 [*] Best so far
model_7: train loss: 193.489, train task loss: 193.489 - val loss: 137.464, val task loss: 137.464 [*] Best so far

Epoch: 2/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 300.23it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 340.87it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 357.35it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 366.14it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 370.01it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 372.44it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 378.31it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 135.014, train task loss: 135.014 - val loss: 99.065, val task loss: 99.065 [*] Best so far
model_2: train loss: 139.325, train task loss: 139.325 - val loss: 106.814, val task loss: 106.814 [*] Best so far
model_3: train loss: 129.766, train task loss: 129.766 - val loss: 96.893, val task loss: 96.893 [*] Best so far
model_4: train loss: 146.330, train task loss: 146.330 - val loss: 112.299, val task loss: 112.299 [*] Best so far
model_5: train loss: 136.006, train task loss: 136.006 - val loss: 102.784, val task loss: 102.784 [*] Best so far
model_6: train loss: 141.985, train task loss: 141.985 - val loss: 103.251, val task loss: 103.251 [*] Best so far
model_7: train loss: 152.344, train task loss: 152.344 - val loss: 109.797, val task loss: 109.797 [*] Best so far

Epoch: 3/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 297.33it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 349.56it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 359.53it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 365.27it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 370.35it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 369.76it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 368.70it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 105.268, train task loss: 105.268 - val loss: 91.264, val task loss: 91.264 [*] Best so far
model_2: train loss: 113.610, train task loss: 113.610 - val loss: 91.911, val task loss: 91.911 [*] Best so far
model_3: train loss: 105.661, train task loss: 105.661 - val loss: 95.829, val task loss: 95.829 [*] Best so far
model_4: train loss: 119.191, train task loss: 119.191 - val loss: 101.165, val task loss: 101.165 [*] Best so far
model_5: train loss: 110.774, train task loss: 110.774 - val loss: 93.858, val task loss: 93.858 [*] Best so far
model_6: train loss: 119.559, train task loss: 119.559 - val loss: 94.867, val task loss: 94.867 [*] Best so far
model_7: train loss: 123.576, train task loss: 123.576 - val loss: 92.934, val task loss: 92.934 [*] Best so far

Epoch: 4/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 287.33it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 330.71it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 348.23it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 361.24it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 367.92it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 369.35it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 372.73it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 89.269, train task loss: 89.269 - val loss: 89.273, val task loss: 89.273 [*] Best so far
model_2: train loss: 94.554, train task loss: 94.554 - val loss: 89.035, val task loss: 89.035 [*] Best so far
model_3: train loss: 90.098, train task loss: 90.098 - val loss: 90.328, val task loss: 90.328 [*] Best so far
model_4: train loss: 101.549, train task loss: 101.549 - val loss: 96.255, val task loss: 96.255 [*] Best so far
model_5: train loss: 94.422, train task loss: 94.422 - val loss: 89.390, val task loss: 89.390 [*] Best so far
model_6: train loss: 106.591, train task loss: 106.591 - val loss: 92.055, val task loss: 92.055 [*] Best so far
model_7: train loss: 107.205, train task loss: 107.205 - val loss: 86.737, val task loss: 86.737 [*] Best so far

Epoch: 5/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 303.38it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 342.56it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 356.62it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 362.53it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 368.77it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 368.37it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 370.73it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 74.721, train task loss: 74.721 - val loss: 88.643, val task loss: 88.643 [*] Best so far
model_2: train loss: 82.132, train task loss: 82.132 - val loss: 89.273, val task loss: 89.273
model_3: train loss: 74.297, train task loss: 74.297 - val loss: 91.384, val task loss: 91.384
model_4: train loss: 88.397, train task loss: 88.397 - val loss: 94.368, val task loss: 94.368 [*] Best so far
model_5: train loss: 79.454, train task loss: 79.454 - val loss: 86.589, val task loss: 86.589 [*] Best so far
model_6: train loss: 94.620, train task loss: 94.620 - val loss: 90.584, val task loss: 90.584 [*] Best so far
model_7: train loss: 95.391, train task loss: 95.391 - val loss: 85.143, val task loss: 85.143 [*] Best so far

Epoch: 6/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 306.82it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 343.35it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 356.90it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 358.06it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 362.20it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 362.12it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 364.27it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 53.416, train task loss: 53.416 - val loss: 82.595, val task loss: 82.595 [*] Best so far
model_2: train loss: 65.390, train task loss: 65.390 - val loss: 86.721, val task loss: 86.721 [*] Best so far
model_3: train loss: 51.542, train task loss: 51.542 - val loss: 84.908, val task loss: 84.908 [*] Best so far
model_4: train loss: 71.859, train task loss: 71.859 - val loss: 94.433, val task loss: 94.433
model_5: train loss: 56.030, train task loss: 56.030 - val loss: 86.513, val task loss: 86.513 [*] Best so far
model_6: train loss: 80.290, train task loss: 80.290 - val loss: 93.676, val task loss: 93.676
model_7: train loss: 85.022, train task loss: 85.022 - val loss: 83.903, val task loss: 83.903 [*] Best so far

Epoch: 7/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 309.69it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 343.67it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 356.61it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 363.14it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 368.22it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 370.38it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 372.10it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 27.934, train task loss: 27.934 - val loss: 69.818, val task loss: 69.818 [*] Best so far
model_2: train loss: 43.500, train task loss: 43.500 - val loss: 80.391, val task loss: 80.391 [*] Best so far
model_3: train loss: 28.576, train task loss: 28.576 - val loss: 77.220, val task loss: 77.220 [*] Best so far
model_4: train loss: 52.319, train task loss: 52.319 - val loss: 92.086, val task loss: 92.086 [*] Best so far
model_5: train loss: 32.230, train task loss: 32.230 - val loss: 86.167, val task loss: 86.167 [*] Best so far
model_6: train loss: 61.485, train task loss: 61.485 - val loss: 92.624, val task loss: 92.624
model_7: train loss: 70.014, train task loss: 70.014 - val loss: 79.844, val task loss: 79.844 [*] Best so far

Epoch: 8/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 305.24it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 346.24it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 354.00it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 362.12it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 370.34it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 369.23it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 373.12it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 15.929, train task loss: 15.929 - val loss: 61.256, val task loss: 61.256 [*] Best so far
model_2: train loss: 27.010, train task loss: 27.010 - val loss: 83.141, val task loss: 83.141
model_3: train loss: 16.845, train task loss: 16.845 - val loss: 67.531, val task loss: 67.531 [*] Best so far
model_4: train loss: 34.026, train task loss: 34.026 - val loss: 92.396, val task loss: 92.396
model_5: train loss: 19.124, train task loss: 19.124 - val loss: 80.414, val task loss: 80.414 [*] Best so far
model_6: train loss: 44.047, train task loss: 44.047 - val loss: 96.687, val task loss: 96.687
model_7: train loss: 47.109, train task loss: 47.109 - val loss: 67.237, val task loss: 67.237 [*] Best so far

Epoch: 9/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 302.48it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 348.01it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 362.23it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 369.18it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 372.30it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 378.59it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 378.77it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 10.044, train task loss: 10.044 - val loss: 58.551, val task loss: 58.551 [*] Best so far
model_2: train loss: 18.178, train task loss: 18.178 - val loss: 77.139, val task loss: 77.139 [*] Best so far
model_3: train loss: 10.970, train task loss: 10.970 - val loss: 67.641, val task loss: 67.641
model_4: train loss: 21.903, train task loss: 21.903 - val loss: 92.846, val task loss: 92.846
model_5: train loss: 12.775, train task loss: 12.775 - val loss: 73.567, val task loss: 73.567 [*] Best so far
model_6: train loss: 32.429, train task loss: 32.429 - val loss: 96.226, val task loss: 96.226
model_7: train loss: 27.869, train task loss: 27.869 - val loss: 63.640, val task loss: 63.640 [*] Best so far

Epoch: 10/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 307.61it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 348.63it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 359.48it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 364.09it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 368.06it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 372.13it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 374.63it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 5.219, train task loss: 5.219 - val loss: 54.250, val task loss: 54.250 [*] Best so far
model_2: train loss: 10.908, train task loss: 10.908 - val loss: 72.695, val task loss: 72.695 [*] Best so far
model_3: train loss: 6.747, train task loss: 6.747 - val loss: 63.602, val task loss: 63.602 [*] Best so far
model_4: train loss: 14.463, train task loss: 14.463 - val loss: 92.759, val task loss: 92.759
model_5: train loss: 8.885, train task loss: 8.885 - val loss: 75.462, val task loss: 75.462
model_6: train loss: 25.829, train task loss: 25.829 - val loss: 97.419, val task loss: 97.419
model_7: train loss: 19.496, train task loss: 19.496 - val loss: 64.310, val task loss: 64.310

Epoch: 11/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 307.84it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 345.68it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 356.50it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 363.28it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 368.06it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 369.06it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 368.65it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 2.909, train task loss: 2.909 - val loss: 55.410, val task loss: 55.410
model_2: train loss: 6.673, train task loss: 6.673 - val loss: 74.187, val task loss: 74.187
model_3: train loss: 3.868, train task loss: 3.868 - val loss: 63.241, val task loss: 63.241 [*] Best so far
model_4: train loss: 9.989, train task loss: 9.989 - val loss: 94.296, val task loss: 94.296
model_5: train loss: 5.429, train task loss: 5.429 - val loss: 73.420, val task loss: 73.420 [*] Best so far
model_6: train loss: 17.773, train task loss: 17.773 - val loss: 98.543, val task loss: 98.543
model_7: train loss: 11.845, train task loss: 11.845 - val loss: 62.267, val task loss: 62.267 [*] Best so far

Epoch: 12/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 295.26it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 341.81it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 355.21it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 362.52it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 356.37it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 358.53it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 365.12it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 1.793, train task loss: 1.793 - val loss: 53.968, val task loss: 53.968 [*] Best so far
model_2: train loss: 3.586, train task loss: 3.586 - val loss: 71.510, val task loss: 71.510 [*] Best so far
model_3: train loss: 2.411, train task loss: 2.411 - val loss: 64.075, val task loss: 64.075
model_4: train loss: 6.529, train task loss: 6.529 - val loss: 93.814, val task loss: 93.814
model_5: train loss: 3.282, train task loss: 3.282 - val loss: 72.598, val task loss: 72.598 [*] Best so far
model_6: train loss: 12.984, train task loss: 12.984 - val loss: 99.131, val task loss: 99.131
model_7: train loss: 8.620, train task loss: 8.620 - val loss: 57.261, val task loss: 57.261 [*] Best so far

Epoch: 13/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 300.45it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 344.46it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 358.64it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 367.19it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 369.82it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 372.85it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 376.22it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 1.202, train task loss: 1.202 - val loss: 53.980, val task loss: 53.980
model_2: train loss: 2.373, train task loss: 2.373 - val loss: 68.700, val task loss: 68.700 [*] Best so far
model_3: train loss: 1.361, train task loss: 1.361 - val loss: 63.476, val task loss: 63.476
model_4: train loss: 3.921, train task loss: 3.921 - val loss: 93.584, val task loss: 93.584
model_5: train loss: 1.770, train task loss: 1.770 - val loss: 71.297, val task loss: 71.297 [*] Best so far
model_6: train loss: 8.994, train task loss: 8.994 - val loss: 103.713, val task loss: 103.713
model_7: train loss: 5.865, train task loss: 5.865 - val loss: 56.314, val task loss: 56.314 [*] Best so far

Epoch: 14/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 311.12it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 350.27it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 359.38it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 365.12it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 367.56it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 375.15it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 375.04it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.862, train task loss: 0.862 - val loss: 53.357, val task loss: 53.357 [*] Best so far
model_2: train loss: 1.511, train task loss: 1.511 - val loss: 71.217, val task loss: 71.217
model_3: train loss: 0.770, train task loss: 0.770 - val loss: 62.968, val task loss: 62.968 [*] Best so far
model_4: train loss: 2.441, train task loss: 2.441 - val loss: 95.133, val task loss: 95.133
model_5: train loss: 1.159, train task loss: 1.159 - val loss: 71.516, val task loss: 71.516
model_6: train loss: 7.627, train task loss: 7.627 - val loss: 103.739, val task loss: 103.739
model_7: train loss: 3.863, train task loss: 3.863 - val loss: 57.209, val task loss: 57.209

Epoch: 15/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 304.86it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 345.99it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 358.67it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 368.15it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 372.57it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 375.18it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 378.70it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.659, train task loss: 0.659 - val loss: 53.294, val task loss: 53.294 [*] Best so far
model_2: train loss: 1.044, train task loss: 1.044 - val loss: 68.181, val task loss: 68.181 [*] Best so far
model_3: train loss: 0.506, train task loss: 0.506 - val loss: 63.606, val task loss: 63.606
model_4: train loss: 1.613, train task loss: 1.613 - val loss: 94.945, val task loss: 94.945
model_5: train loss: 0.810, train task loss: 0.810 - val loss: 70.370, val task loss: 70.370 [*] Best so far
model_6: train loss: 6.113, train task loss: 6.113 - val loss: 99.076, val task loss: 99.076
model_7: train loss: 3.373, train task loss: 3.373 - val loss: 53.010, val task loss: 53.010 [*] Best so far

Epoch: 16/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 302.21it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 346.80it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:02, 367.16it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 370.39it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 371.78it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 375.68it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 375.62it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.483, train task loss: 0.483 - val loss: 53.509, val task loss: 53.509
model_2: train loss: 0.644, train task loss: 0.644 - val loss: 69.082, val task loss: 69.082
model_3: train loss: 0.334, train task loss: 0.334 - val loss: 63.263, val task loss: 63.263
model_4: train loss: 1.239, train task loss: 1.239 - val loss: 96.485, val task loss: 96.485
model_5: train loss: 0.572, train task loss: 0.572 - val loss: 71.482, val task loss: 71.482
model_6: train loss: 3.794, train task loss: 3.794 - val loss: 101.662, val task loss: 101.662
model_7: train loss: 2.047, train task loss: 2.047 - val loss: 52.996, val task loss: 52.996 [*] Best so far

Epoch: 17/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 306.57it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 343.93it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 358.00it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 362.17it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 359.50it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 362.43it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 368.97it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.333, train task loss: 0.333 - val loss: 52.703, val task loss: 52.703 [*] Best so far
model_2: train loss: 0.380, train task loss: 0.380 - val loss: 68.178, val task loss: 68.178 [*] Best so far
model_3: train loss: 0.299, train task loss: 0.299 - val loss: 63.672, val task loss: 63.672
model_4: train loss: 0.773, train task loss: 0.773 - val loss: 94.366, val task loss: 94.366
model_5: train loss: 0.566, train task loss: 0.566 - val loss: 71.650, val task loss: 71.650
model_6: train loss: 2.739, train task loss: 2.739 - val loss: 103.816, val task loss: 103.816
model_7: train loss: 1.370, train task loss: 1.370 - val loss: 54.765, val task loss: 54.765

Epoch: 18/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 313.62it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 358.05it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:02, 366.09it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 368.84it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 369.52it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 375.63it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 373.33it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.272, train task loss: 0.272 - val loss: 52.826, val task loss: 52.826
model_2: train loss: 0.272, train task loss: 0.272 - val loss: 68.743, val task loss: 68.743
model_3: train loss: 0.237, train task loss: 0.237 - val loss: 63.429, val task loss: 63.429
model_4: train loss: 0.471, train task loss: 0.471 - val loss: 94.225, val task loss: 94.225
model_5: train loss: 0.378, train task loss: 0.378 - val loss: 71.203, val task loss: 71.203
model_6: train loss: 1.729, train task loss: 1.729 - val loss: 105.459, val task loss: 105.459
model_7: train loss: 0.966, train task loss: 0.966 - val loss: 53.573, val task loss: 53.573

Epoch: 19/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 318.52it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 360.29it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:02, 368.28it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 379.46it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 382.87it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 387.56it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 386.11it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.188, train task loss: 0.188 - val loss: 52.950, val task loss: 52.950
model_2: train loss: 0.207, train task loss: 0.207 - val loss: 68.067, val task loss: 68.067 [*] Best so far
model_3: train loss: 0.211, train task loss: 0.211 - val loss: 63.233, val task loss: 63.233
model_4: train loss: 0.280, train task loss: 0.280 - val loss: 94.931, val task loss: 94.931
model_5: train loss: 0.343, train task loss: 0.343 - val loss: 72.093, val task loss: 72.093
model_6: train loss: 1.322, train task loss: 1.322 - val loss: 103.527, val task loss: 103.527
model_7: train loss: 0.732, train task loss: 0.732 - val loss: 53.141, val task loss: 53.141

Epoch: 20/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 301.83it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 346.61it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 354.85it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 342.57it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 356.36it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 361.42it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 365.20it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.160, train task loss: 0.160 - val loss: 52.421, val task loss: 52.421 [*] Best so far
model_2: train loss: 0.157, train task loss: 0.157 - val loss: 68.081, val task loss: 68.081
model_3: train loss: 0.234, train task loss: 0.234 - val loss: 63.532, val task loss: 63.532
model_4: train loss: 0.245, train task loss: 0.245 - val loss: 94.097, val task loss: 94.097
model_5: train loss: 0.269, train task loss: 0.269 - val loss: 71.323, val task loss: 71.323
model_6: train loss: 0.775, train task loss: 0.775 - val loss: 103.462, val task loss: 103.462
model_7: train loss: 0.558, train task loss: 0.558 - val loss: 53.235, val task loss: 53.235

Epoch: 21/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 302.48it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 343.90it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:02, 362.69it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 368.46it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 372.54it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 375.28it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 376.56it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.144, train task loss: 0.144 - val loss: 52.926, val task loss: 52.926
model_2: train loss: 0.114, train task loss: 0.114 - val loss: 68.033, val task loss: 68.033 [*] Best so far
model_3: train loss: 0.172, train task loss: 0.172 - val loss: 63.201, val task loss: 63.201
model_4: train loss: 0.175, train task loss: 0.175 - val loss: 94.910, val task loss: 94.910
model_5: train loss: 0.220, train task loss: 0.220 - val loss: 70.843, val task loss: 70.843
model_6: train loss: 0.514, train task loss: 0.514 - val loss: 102.271, val task loss: 102.271
model_7: train loss: 0.418, train task loss: 0.418 - val loss: 53.003, val task loss: 53.003

Epoch: 22/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 306.55it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 344.86it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 358.78it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 366.76it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 371.87it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 377.68it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 377.75it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.108, train task loss: 0.108 - val loss: 52.762, val task loss: 52.762
model_2: train loss: 0.119, train task loss: 0.119 - val loss: 68.049, val task loss: 68.049
model_3: train loss: 0.142, train task loss: 0.142 - val loss: 62.458, val task loss: 62.458 [*] Best so far
model_4: train loss: 0.144, train task loss: 0.144 - val loss: 94.324, val task loss: 94.324
model_5: train loss: 0.207, train task loss: 0.207 - val loss: 70.607, val task loss: 70.607
model_6: train loss: 0.345, train task loss: 0.345 - val loss: 103.211, val task loss: 103.211
model_7: train loss: 0.258, train task loss: 0.258 - val loss: 53.324, val task loss: 53.324

Epoch: 23/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 315.18it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 352.66it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 359.92it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 364.87it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 371.45it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 375.74it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 377.02it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.110, train task loss: 0.110 - val loss: 52.860, val task loss: 52.860
model_2: train loss: 0.128, train task loss: 0.128 - val loss: 68.009, val task loss: 68.009 [*] Best so far
model_3: train loss: 0.138, train task loss: 0.138 - val loss: 63.492, val task loss: 63.492
model_4: train loss: 0.127, train task loss: 0.127 - val loss: 94.512, val task loss: 94.512
model_5: train loss: 0.179, train task loss: 0.179 - val loss: 70.831, val task loss: 70.831
model_6: train loss: 0.280, train task loss: 0.280 - val loss: 103.720, val task loss: 103.720
model_7: train loss: 0.215, train task loss: 0.215 - val loss: 53.634, val task loss: 53.634

Epoch: 24/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 313.13it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 344.64it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 357.86it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 369.07it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 372.49it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 376.20it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 369.04it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.103, train task loss: 0.103 - val loss: 52.863, val task loss: 52.863
model_2: train loss: 0.145, train task loss: 0.145 - val loss: 68.478, val task loss: 68.478
model_3: train loss: 0.136, train task loss: 0.136 - val loss: 63.547, val task loss: 63.547
model_4: train loss: 0.125, train task loss: 0.125 - val loss: 95.319, val task loss: 95.319
model_5: train loss: 0.163, train task loss: 0.163 - val loss: 71.222, val task loss: 71.222
model_6: train loss: 0.195, train task loss: 0.195 - val loss: 102.512, val task loss: 102.512
model_7: train loss: 0.208, train task loss: 0.208 - val loss: 53.621, val task loss: 53.621

Epoch: 25/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 313.46it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 350.69it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 360.88it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 368.22it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 374.81it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 371.92it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 376.78it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.113, train task loss: 0.113 - val loss: 53.125, val task loss: 53.125
model_2: train loss: 0.136, train task loss: 0.136 - val loss: 68.454, val task loss: 68.454
model_3: train loss: 0.152, train task loss: 0.152 - val loss: 63.336, val task loss: 63.336
model_4: train loss: 0.118, train task loss: 0.118 - val loss: 95.389, val task loss: 95.389
model_5: train loss: 0.131, train task loss: 0.131 - val loss: 71.262, val task loss: 71.262
model_6: train loss: 0.136, train task loss: 0.136 - val loss: 102.364, val task loss: 102.364
model_7: train loss: 0.158, train task loss: 0.158 - val loss: 53.284, val task loss: 53.284

Epoch: 26/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 303.69it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 344.48it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 359.73it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 364.38it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 368.66it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 371.83it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 373.87it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.102, train task loss: 0.102 - val loss: 52.730, val task loss: 52.730
model_2: train loss: 0.142, train task loss: 0.142 - val loss: 68.467, val task loss: 68.467
model_3: train loss: 0.168, train task loss: 0.168 - val loss: 63.400, val task loss: 63.400
model_4: train loss: 0.100, train task loss: 0.100 - val loss: 95.079, val task loss: 95.079
model_5: train loss: 0.093, train task loss: 0.093 - val loss: 70.448, val task loss: 70.448
model_6: train loss: 0.113, train task loss: 0.113 - val loss: 102.707, val task loss: 102.707
model_7: train loss: 0.112, train task loss: 0.112 - val loss: 52.901, val task loss: 52.901 [*] Best so far

Epoch: 27/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 302.09it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 341.36it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 356.70it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 366.09it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 368.63it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 374.31it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 381.12it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.098, train task loss: 0.098 - val loss: 52.572, val task loss: 52.572
model_2: train loss: 0.132, train task loss: 0.132 - val loss: 68.430, val task loss: 68.430
model_3: train loss: 0.194, train task loss: 0.194 - val loss: 63.023, val task loss: 63.023
model_4: train loss: 0.098, train task loss: 0.098 - val loss: 94.902, val task loss: 94.902
model_5: train loss: 0.086, train task loss: 0.086 - val loss: 70.860, val task loss: 70.860
model_6: train loss: 0.080, train task loss: 0.080 - val loss: 102.253, val task loss: 102.253
model_7: train loss: 0.097, train task loss: 0.097 - val loss: 52.909, val task loss: 52.909

Epoch: 28/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:03, 305.85it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 350.86it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:02, 367.68it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 376.68it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 377.34it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 377.32it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 381.35it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.088, train task loss: 0.088 - val loss: 52.644, val task loss: 52.644
model_2: train loss: 0.130, train task loss: 0.130 - val loss: 68.920, val task loss: 68.920
model_3: train loss: 0.238, train task loss: 0.238 - val loss: 62.561, val task loss: 62.561
model_4: train loss: 0.100, train task loss: 0.100 - val loss: 95.029, val task loss: 95.029
model_5: train loss: 0.080, train task loss: 0.080 - val loss: 71.161, val task loss: 71.161
model_6: train loss: 0.058, train task loss: 0.058 - val loss: 102.963, val task loss: 102.963
model_7: train loss: 0.085, train task loss: 0.085 - val loss: 53.282, val task loss: 53.282

Epoch: 29/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 286.99it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 322.43it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 343.31it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 355.95it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 366.08it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 371.23it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 377.86it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.096, train task loss: 0.096 - val loss: 52.574, val task loss: 52.574
model_2: train loss: 0.116, train task loss: 0.116 - val loss: 68.463, val task loss: 68.463
model_3: train loss: 0.271, train task loss: 0.271 - val loss: 63.089, val task loss: 63.089
model_4: train loss: 0.102, train task loss: 0.102 - val loss: 95.686, val task loss: 95.686
model_5: train loss: 0.078, train task loss: 0.078 - val loss: 70.931, val task loss: 70.931
model_6: train loss: 0.058, train task loss: 0.058 - val loss: 102.801, val task loss: 102.801
model_7: train loss: 0.056, train task loss: 0.056 - val loss: 53.353, val task loss: 53.353

Epoch: 30/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:04, 295.12it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:03, 336.93it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:03, 353.44it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:00<00:02, 363.20it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:00<00:02, 368.03it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:02, 374.75it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:02, 371.80it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 0.107, train task loss: 0.107 - val loss: 51.967, val task loss: 51.967 [*] Best so far
model_2: train loss: 0.096, train task loss: 0.096 - val loss: 68.114, val task loss: 68.114
model_3: train loss: 0.303, train task loss: 0.303 - val loss: 63.629, val task loss: 63.629
model_4: train loss: 0.113, train task loss: 0.113 - val loss: 94.433, val task loss: 94.433
model_5: train loss: 0.080, train task loss: 0.080 - val loss: 70.447, val task loss: 70.447
model_6: train loss: 0.069, train task loss: 0.069 - val loss: 103.283, val task loss: 103.283
model_7: train loss: 0.064, train task loss: 0.064 - val loss: 53.442, val task loss: 53.442
Training with disagreement penalty = 5
Computing divergence weights by clustering method...




Initialization complete
Iteration 0, inertia 96.54188727370638.
Iteration 1, inertia 34.56048188199202.
Converged at iteration 1: strict convergence.
Computed divergence weights by clustering method, weights are [0.5 0.  0.  0.  0.  0.  0.5]

Epoch: 1/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 198.28it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 214.41it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.25it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 228.97it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 230.72it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.86it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 233.24it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 192.653, train task loss: 188.215 - val loss: 132.418, val task loss: 123.392 [*] Best so far
model_2: train loss: 191.650, train task loss: 184.508 - val loss: 137.995, val task loss: 125.229 [*] Best so far
model_3: train loss: 190.801, train task loss: 184.161 - val loss: 131.215, val task loss: 118.221 [*] Best so far
model_4: train loss: 196.110, train task loss: 189.271 - val loss: 145.140, val task loss: 131.059 [*] Best so far
model_5: train loss: 192.249, train task loss: 185.941 - val loss: 134.268, val task loss: 122.856 [*] Best so far
model_6: train loss: 192.961, train task loss: 185.424 - val loss: 139.427, val task loss: 123.757 [*] Best so far
model_7: train loss: 200.774, train task loss: 196.336 - val loss: 142.578, val task loss: 133.552 [*] Best so far

Epoch: 2/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 203.86it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 218.64it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 227.00it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 230.70it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 235.40it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 237.42it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 238.53it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 140.061, train task loss: 133.555 - val loss: 108.117, val task loss: 99.750 [*] Best so far
model_2: train loss: 148.831, train task loss: 137.994 - val loss: 117.312, val task loss: 103.742 [*] Best so far
model_3: train loss: 144.116, train task loss: 134.731 - val loss: 112.608, val task loss: 99.604 [*] Best so far
model_4: train loss: 157.375, train task loss: 144.383 - val loss: 126.933, val task loss: 107.734 [*] Best so far
model_5: train loss: 147.855, train task loss: 137.266 - val loss: 114.434, val task loss: 101.830 [*] Best so far
model_6: train loss: 158.868, train task loss: 142.664 - val loss: 127.522, val task loss: 105.193 [*] Best so far
model_7: train loss: 154.541, train task loss: 148.035 - val loss: 116.870, val task loss: 108.503 [*] Best so far

Epoch: 3/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 195.06it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 214.02it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 223.07it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 227.45it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 230.13it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.66it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 232.79it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 116.655, train task loss: 109.376 - val loss: 95.764, val task loss: 88.972 [*] Best so far
model_2: train loss: 127.460, train task loss: 113.368 - val loss: 106.069, val task loss: 90.619 [*] Best so far
model_3: train loss: 121.906, train task loss: 110.705 - val loss: 103.485, val task loss: 88.587 [*] Best so far
model_4: train loss: 144.930, train task loss: 121.062 - val loss: 125.005, val task loss: 96.143 [*] Best so far
model_5: train loss: 127.206, train task loss: 113.563 - val loss: 105.457, val task loss: 90.548 [*] Best so far
model_6: train loss: 155.603, train task loss: 124.559 - val loss: 135.018, val task loss: 94.873 [*] Best so far
model_7: train loss: 130.600, train task loss: 123.321 - val loss: 99.596, val task loss: 92.804 [*] Best so far

Epoch: 4/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 194.91it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 217.12it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.31it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.98it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 234.94it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 235.16it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 235.87it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 97.843, train task loss: 91.087 - val loss: 91.726, val task loss: 83.617 [*] Best so far
model_2: train loss: 112.041, train task loss: 94.779 - val loss: 109.955, val task loss: 85.105 [*] Best so far
model_3: train loss: 103.186, train task loss: 92.276 - val loss: 104.711, val task loss: 85.314 [*] Best so far
model_4: train loss: 140.610, train task loss: 104.896 - val loss: 146.397, val task loss: 93.638 [*] Best so far
model_5: train loss: 108.782, train task loss: 95.337 - val loss: 111.816, val task loss: 85.665 [*] Best so far
model_6: train loss: 157.885, train task loss: 109.095 - val loss: 172.866, val task loss: 91.064 [*] Best so far
model_7: train loss: 112.667, train task loss: 105.911 - val loss: 96.507, val task loss: 88.398 [*] Best so far

Epoch: 5/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 189.25it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 210.52it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 221.25it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 227.91it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 232.66it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 236.05it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 236.66it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 87.087, train task loss: 79.011 - val loss: 94.541, val task loss: 82.078 [*] Best so far
model_2: train loss: 103.759, train task loss: 84.698 - val loss: 125.483, val task loss: 88.056
model_3: train loss: 92.686, train task loss: 80.646 - val loss: 123.549, val task loss: 88.686
model_4: train loss: 132.084, train task loss: 94.892 - val loss: 177.430, val task loss: 99.572
model_5: train loss: 96.861, train task loss: 83.216 - val loss: 125.460, val task loss: 91.099
model_6: train loss: 152.923, train task loss: 99.064 - val loss: 195.685, val task loss: 96.372
model_7: train loss: 105.661, train task loss: 97.585 - val loss: 99.130, val task loss: 86.668 [*] Best so far

Epoch: 6/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 207.05it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 224.98it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 230.58it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 233.76it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 235.22it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 235.94it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 239.22it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 76.791, train task loss: 65.334 - val loss: 92.516, val task loss: 76.806 [*] Best so far
model_2: train loss: 95.815, train task loss: 73.901 - val loss: 125.865, val task loss: 83.942 [*] Best so far
model_3: train loss: 84.418, train task loss: 70.676 - val loss: 107.669, val task loss: 79.988 [*] Best so far
model_4: train loss: 127.552, train task loss: 86.917 - val loss: 154.404, val task loss: 88.918 [*] Best so far
model_5: train loss: 90.324, train task loss: 73.566 - val loss: 106.812, val task loss: 81.827 [*] Best so far
model_6: train loss: 141.741, train task loss: 89.461 - val loss: 168.571, val task loss: 86.792 [*] Best so far
model_7: train loss: 100.871, train task loss: 89.414 - val loss: 98.722, val task loss: 83.012 [*] Best so far

Epoch: 7/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 205.97it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 222.93it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 229.00it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 234.69it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 237.70it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 238.95it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 242.27it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 65.453, train task loss: 52.646 - val loss: 94.366, val task loss: 72.018 [*] Best so far
model_2: train loss: 84.682, train task loss: 63.688 - val loss: 131.766, val task loss: 80.302 [*] Best so far
model_3: train loss: 73.047, train task loss: 58.651 - val loss: 112.776, val task loss: 74.159 [*] Best so far
model_4: train loss: 113.024, train task loss: 75.704 - val loss: 171.058, val task loss: 87.594 [*] Best so far
model_5: train loss: 80.594, train task loss: 62.235 - val loss: 117.737, val task loss: 79.224 [*] Best so far
model_6: train loss: 125.586, train task loss: 78.039 - val loss: 198.220, val task loss: 87.519
model_7: train loss: 91.142, train task loss: 78.334 - val loss: 101.627, val task loss: 79.279 [*] Best so far

Epoch: 8/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 202.68it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 219.97it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 228.82it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 235.89it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 238.23it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 237.51it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 237.94it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 55.522, train task loss: 43.038 - val loss: 108.952, val task loss: 70.945 [*] Best so far
model_2: train loss: 73.435, train task loss: 51.483 - val loss: 156.170, val task loss: 78.966 [*] Best so far
model_3: train loss: 62.836, train task loss: 46.780 - val loss: 137.064, val task loss: 75.335
model_4: train loss: 96.473, train task loss: 61.220 - val loss: 208.488, val task loss: 87.102 [*] Best so far
model_5: train loss: 70.065, train task loss: 51.699 - val loss: 131.342, val task loss: 79.873
model_6: train loss: 107.939, train task loss: 63.913 - val loss: 254.810, val task loss: 89.026
model_7: train loss: 76.727, train task loss: 64.243 - val loss: 115.628, val task loss: 77.622 [*] Best so far

Epoch: 9/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 199.37it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 219.76it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 231.74it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 230.53it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 233.85it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 234.91it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 234.01it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 47.279, train task loss: 30.711 - val loss: 123.979, val task loss: 66.738 [*] Best so far
model_2: train loss: 75.116, train task loss: 40.785 - val loss: 191.451, val task loss: 78.779 [*] Best so far
model_3: train loss: 62.035, train task loss: 36.161 - val loss: 159.809, val task loss: 69.158 [*] Best so far
model_4: train loss: 100.233, train task loss: 49.433 - val loss: 261.748, val task loss: 91.520
model_5: train loss: 66.613, train task loss: 39.919 - val loss: 167.410, val task loss: 76.297 [*] Best so far
model_6: train loss: 112.709, train task loss: 50.479 - val loss: 333.071, val task loss: 99.162
model_7: train loss: 60.403, train task loss: 43.835 - val loss: 126.855, val task loss: 69.613 [*] Best so far

Epoch: 10/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 204.30it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 210.02it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 213.96it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 212.49it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 220.02it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 219.50it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 219.67it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 46.307, train task loss: 23.946 - val loss: 132.177, val task loss: 64.503 [*] Best so far
model_2: train loss: 79.072, train task loss: 30.031 - val loss: 212.474, val task loss: 77.734 [*] Best so far
model_3: train loss: 66.341, train task loss: 27.949 - val loss: 190.332, val task loss: 71.396
model_4: train loss: 96.145, train task loss: 36.773 - val loss: 282.674, val task loss: 87.538
model_5: train loss: 75.719, train task loss: 30.407 - val loss: 173.734, val task loss: 75.005 [*] Best so far
model_6: train loss: 108.740, train task loss: 39.065 - val loss: 349.815, val task loss: 93.651
model_7: train loss: 50.194, train task loss: 27.833 - val loss: 134.485, val task loss: 66.810 [*] Best so far

Epoch: 11/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 174.70it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:06, 188.55it/s][A
 15%|███████████                                                               | 192/1280 [00:01<00:05, 194.01it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:05, 199.35it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 200.61it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 199.96it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:04, 201.11it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 45.292, train task loss: 20.044 - val loss: 130.407, val task loss: 64.432 [*] Best so far
model_2: train loss: 67.409, train task loss: 24.314 - val loss: 198.275, val task loss: 71.379 [*] Best so far
model_3: train loss: 61.108, train task loss: 22.426 - val loss: 171.730, val task loss: 68.696 [*] Best so far
model_4: train loss: 78.059, train task loss: 27.263 - val loss: 297.614, val task loss: 90.256
model_5: train loss: 67.225, train task loss: 25.488 - val loss: 175.550, val task loss: 71.040 [*] Best so far
model_6: train loss: 84.154, train task loss: 27.902 - val loss: 332.956, val task loss: 91.888
model_7: train loss: 48.101, train task loss: 22.852 - val loss: 129.490, val task loss: 63.515 [*] Best so far

Epoch: 12/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 189.22it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 205.88it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 211.16it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 216.13it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 220.32it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 223.04it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 221.22it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 37.078, train task loss: 16.419 - val loss: 125.587, val task loss: 57.534 [*] Best so far
model_2: train loss: 55.024, train task loss: 19.729 - val loss: 207.977, val task loss: 70.745 [*] Best so far
model_3: train loss: 54.625, train task loss: 20.321 - val loss: 165.941, val task loss: 61.966 [*] Best so far
model_4: train loss: 62.779, train task loss: 21.900 - val loss: 293.946, val task loss: 85.690 [*] Best so far
model_5: train loss: 53.902, train task loss: 20.355 - val loss: 184.262, val task loss: 71.869
model_6: train loss: 69.581, train task loss: 23.760 - val loss: 349.479, val task loss: 91.207
model_7: train loss: 38.405, train task loss: 17.745 - val loss: 129.581, val task loss: 61.528 [*] Best so far

Epoch: 13/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 184.29it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 199.69it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 206.91it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 219.89it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 221.97it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 222.90it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 222.79it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 32.509, train task loss: 12.525 - val loss: 138.732, val task loss: 61.707
model_2: train loss: 47.079, train task loss: 15.210 - val loss: 216.385, val task loss: 73.341
model_3: train loss: 45.329, train task loss: 15.307 - val loss: 196.764, val task loss: 68.450
model_4: train loss: 51.699, train task loss: 16.510 - val loss: 319.685, val task loss: 87.790
model_5: train loss: 46.481, train task loss: 16.015 - val loss: 191.967, val task loss: 71.634
model_6: train loss: 58.547, train task loss: 17.967 - val loss: 379.061, val task loss: 97.052
model_7: train loss: 33.864, train task loss: 13.880 - val loss: 137.164, val task loss: 60.139 [*] Best so far

Epoch: 14/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 181.28it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 194.80it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 196.92it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:05, 201.67it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 204.50it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 205.33it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:04, 205.34it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 36.326, train task loss: 11.341 - val loss: 130.404, val task loss: 58.821
model_2: train loss: 48.223, train task loss: 13.856 - val loss: 207.396, val task loss: 67.064 [*] Best so far
model_3: train loss: 42.656, train task loss: 12.565 - val loss: 176.599, val task loss: 61.295 [*] Best so far
model_4: train loss: 49.216, train task loss: 14.051 - val loss: 309.950, val task loss: 85.314 [*] Best so far
model_5: train loss: 45.316, train task loss: 13.163 - val loss: 184.542, val task loss: 68.297 [*] Best so far
model_6: train loss: 51.720, train task loss: 14.951 - val loss: 359.927, val task loss: 93.077
model_7: train loss: 37.996, train task loss: 13.011 - val loss: 130.771, val task loss: 59.188 [*] Best so far

Epoch: 15/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 194.45it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 215.34it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.11it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.42it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 231.49it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 232.49it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 233.19it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 26.788, train task loss: 9.567 - val loss: 141.269, val task loss: 61.572
model_2: train loss: 35.296, train task loss: 11.199 - val loss: 198.774, val task loss: 66.501 [*] Best so far
model_3: train loss: 31.168, train task loss: 10.302 - val loss: 190.714, val task loss: 65.430
model_4: train loss: 38.691, train task loss: 12.025 - val loss: 322.673, val task loss: 87.016
model_5: train loss: 38.020, train task loss: 11.565 - val loss: 193.953, val task loss: 70.823
model_6: train loss: 39.170, train task loss: 12.123 - val loss: 375.793, val task loss: 94.516
model_7: train loss: 27.894, train task loss: 10.672 - val loss: 136.679, val task loss: 56.983 [*] Best so far

Epoch: 16/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 203.54it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.57it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 225.58it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 228.20it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 232.35it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 233.07it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 235.08it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 24.613, train task loss: 7.831 - val loss: 140.660, val task loss: 56.956 [*] Best so far
model_2: train loss: 28.302, train task loss: 8.257 - val loss: 218.166, val task loss: 65.748 [*] Best so far
model_3: train loss: 29.789, train task loss: 8.645 - val loss: 198.315, val task loss: 62.365
model_4: train loss: 32.439, train task loss: 9.313 - val loss: 332.161, val task loss: 86.597
model_5: train loss: 32.220, train task loss: 8.843 - val loss: 195.426, val task loss: 66.868 [*] Best so far
model_6: train loss: 34.252, train task loss: 9.291 - val loss: 376.045, val task loss: 92.068
model_7: train loss: 25.305, train task loss: 8.523 - val loss: 144.551, val task loss: 60.848

Epoch: 17/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 202.84it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 217.86it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 225.39it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.96it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 232.59it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 234.40it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 235.06it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 22.159, train task loss: 6.661 - val loss: 134.969, val task loss: 57.980
model_2: train loss: 24.910, train task loss: 7.241 - val loss: 198.499, val task loss: 64.369 [*] Best so far
model_3: train loss: 26.284, train task loss: 7.159 - val loss: 184.162, val task loss: 60.817 [*] Best so far
model_4: train loss: 28.156, train task loss: 8.032 - val loss: 329.278, val task loss: 87.916
model_5: train loss: 24.825, train task loss: 7.235 - val loss: 183.625, val task loss: 67.349
model_6: train loss: 28.447, train task loss: 8.074 - val loss: 385.298, val task loss: 95.194
model_7: train loss: 23.444, train task loss: 7.946 - val loss: 134.626, val task loss: 57.636

Epoch: 18/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 212.41it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 223.64it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 230.28it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 232.30it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 233.14it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 232.59it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 235.55it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 15.586, train task loss: 5.210 - val loss: 139.236, val task loss: 57.329
model_2: train loss: 18.075, train task loss: 5.540 - val loss: 204.916, val task loss: 62.742 [*] Best so far
model_3: train loss: 19.623, train task loss: 5.977 - val loss: 191.718, val task loss: 61.030
model_4: train loss: 19.386, train task loss: 6.055 - val loss: 338.161, val task loss: 86.473
model_5: train loss: 18.321, train task loss: 5.715 - val loss: 194.215, val task loss: 66.070 [*] Best so far
model_6: train loss: 20.489, train task loss: 6.208 - val loss: 376.129, val task loss: 93.570
model_7: train loss: 16.228, train task loss: 5.852 - val loss: 136.337, val task loss: 54.430 [*] Best so far

Epoch: 19/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 202.31it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 218.62it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 226.21it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 231.31it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 232.82it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 233.12it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 236.36it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 12.515, train task loss: 4.295 - val loss: 128.178, val task loss: 54.202 [*] Best so far
model_2: train loss: 15.064, train task loss: 4.676 - val loss: 207.763, val task loss: 65.130
model_3: train loss: 16.236, train task loss: 4.928 - val loss: 189.868, val task loss: 59.861 [*] Best so far
model_4: train loss: 16.798, train task loss: 5.017 - val loss: 335.089, val task loss: 88.399
model_5: train loss: 15.748, train task loss: 4.883 - val loss: 190.569, val task loss: 66.842
model_6: train loss: 18.734, train task loss: 5.491 - val loss: 380.293, val task loss: 93.802
model_7: train loss: 13.215, train task loss: 4.995 - val loss: 130.343, val task loss: 56.367

Epoch: 20/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 205.32it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 221.44it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 226.85it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 228.46it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 232.55it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 236.13it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 236.48it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 10.384, train task loss: 3.484 - val loss: 131.531, val task loss: 54.894
model_2: train loss: 12.522, train task loss: 3.732 - val loss: 206.606, val task loss: 63.024
model_3: train loss: 13.249, train task loss: 3.785 - val loss: 192.348, val task loss: 59.688 [*] Best so far
model_4: train loss: 15.235, train task loss: 4.196 - val loss: 335.025, val task loss: 87.021
model_5: train loss: 12.879, train task loss: 3.751 - val loss: 190.688, val task loss: 65.080 [*] Best so far
model_6: train loss: 17.612, train task loss: 4.702 - val loss: 388.885, val task loss: 93.967
model_7: train loss: 10.662, train task loss: 3.762 - val loss: 130.896, val task loss: 54.259 [*] Best so far

Epoch: 21/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 207.92it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 222.53it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 229.46it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 232.47it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 234.07it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 233.78it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 234.73it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 8.856, train task loss: 2.872 - val loss: 137.046, val task loss: 54.623
model_2: train loss: 10.830, train task loss: 3.306 - val loss: 211.436, val task loss: 63.250
model_3: train loss: 11.631, train task loss: 3.493 - val loss: 193.264, val task loss: 59.577 [*] Best so far
model_4: train loss: 14.957, train task loss: 4.029 - val loss: 360.233, val task loss: 89.950
model_5: train loss: 11.992, train task loss: 3.535 - val loss: 212.186, val task loss: 67.346
model_6: train loss: 15.679, train task loss: 4.054 - val loss: 405.556, val task loss: 94.037
model_7: train loss: 9.337, train task loss: 3.353 - val loss: 136.034, val task loss: 53.611 [*] Best so far

Epoch: 22/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 208.95it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 224.48it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 228.16it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 230.80it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 232.66it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 228.60it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 232.89it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 8.830, train task loss: 2.541 - val loss: 121.817, val task loss: 53.151 [*] Best so far
model_2: train loss: 10.383, train task loss: 2.748 - val loss: 202.048, val task loss: 61.343 [*] Best so far
model_3: train loss: 11.492, train task loss: 2.767 - val loss: 191.705, val task loss: 60.948
model_4: train loss: 13.006, train task loss: 3.235 - val loss: 343.901, val task loss: 87.595
model_5: train loss: 12.419, train task loss: 3.018 - val loss: 191.938, val task loss: 65.866
model_6: train loss: 13.984, train task loss: 3.390 - val loss: 382.473, val task loss: 93.730
model_7: train loss: 9.136, train task loss: 2.847 - val loss: 121.924, val task loss: 53.258 [*] Best so far

Epoch: 23/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 203.84it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 220.54it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 226.97it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 231.30it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 233.64it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 233.93it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 234.60it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 8.890, train task loss: 2.368 - val loss: 129.889, val task loss: 52.786 [*] Best so far
model_2: train loss: 10.631, train task loss: 2.539 - val loss: 214.505, val task loss: 63.874
model_3: train loss: 12.449, train task loss: 2.864 - val loss: 194.805, val task loss: 58.596 [*] Best so far
model_4: train loss: 12.088, train task loss: 2.776 - val loss: 350.553, val task loss: 87.738
model_5: train loss: 13.406, train task loss: 2.984 - val loss: 192.930, val task loss: 64.664 [*] Best so far
model_6: train loss: 13.982, train task loss: 3.204 - val loss: 415.792, val task loss: 97.197
model_7: train loss: 9.193, train task loss: 2.671 - val loss: 131.105, val task loss: 54.002

Epoch: 24/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 214.54it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 228.08it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 229.34it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 232.23it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 231.57it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 232.11it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.53it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 8.107, train task loss: 2.118 - val loss: 132.612, val task loss: 53.188
model_2: train loss: 10.189, train task loss: 2.265 - val loss: 200.139, val task loss: 60.126 [*] Best so far
model_3: train loss: 11.373, train task loss: 2.606 - val loss: 197.647, val task loss: 59.592
model_4: train loss: 11.705, train task loss: 2.646 - val loss: 340.840, val task loss: 87.312
model_5: train loss: 11.166, train task loss: 2.519 - val loss: 203.344, val task loss: 65.750
model_6: train loss: 11.875, train task loss: 2.696 - val loss: 399.325, val task loss: 95.716
model_7: train loss: 8.267, train task loss: 2.277 - val loss: 132.468, val task loss: 53.044 [*] Best so far

Epoch: 25/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 206.53it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 223.36it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.29it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 230.65it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 233.96it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 238.36it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 235.15it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 6.680, train task loss: 1.797 - val loss: 131.871, val task loss: 53.555
model_2: train loss: 8.584, train task loss: 2.078 - val loss: 208.583, val task loss: 62.295
model_3: train loss: 10.197, train task loss: 2.272 - val loss: 195.482, val task loss: 58.654
model_4: train loss: 10.234, train task loss: 2.294 - val loss: 349.324, val task loss: 88.482
model_5: train loss: 9.303, train task loss: 2.065 - val loss: 194.312, val task loss: 64.267 [*] Best so far
model_6: train loss: 9.292, train task loss: 2.063 - val loss: 380.782, val task loss: 91.980
model_7: train loss: 6.805, train task loss: 1.922 - val loss: 131.133, val task loss: 52.817 [*] Best so far

Epoch: 26/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 206.26it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 221.01it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 226.30it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.53it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 231.83it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 230.04it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.41it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 7.869, train task loss: 1.778 - val loss: 138.374, val task loss: 53.166
model_2: train loss: 8.535, train task loss: 1.800 - val loss: 211.007, val task loss: 62.243
model_3: train loss: 9.964, train task loss: 2.040 - val loss: 205.698, val task loss: 61.036
model_4: train loss: 10.860, train task loss: 2.165 - val loss: 359.077, val task loss: 88.960
model_5: train loss: 10.161, train task loss: 1.988 - val loss: 204.858, val task loss: 65.812
model_6: train loss: 10.532, train task loss: 2.067 - val loss: 411.342, val task loss: 96.064
model_7: train loss: 8.054, train task loss: 1.963 - val loss: 138.903, val task loss: 53.695

Epoch: 27/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 204.08it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 219.55it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 225.76it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 230.27it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 233.48it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 233.82it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 234.70it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 7.999, train task loss: 1.681 - val loss: 131.097, val task loss: 52.193 [*] Best so far
model_2: train loss: 8.416, train task loss: 1.516 - val loss: 205.978, val task loss: 61.672
model_3: train loss: 9.729, train task loss: 1.728 - val loss: 193.286, val task loss: 58.422 [*] Best so far
model_4: train loss: 10.646, train task loss: 1.929 - val loss: 343.746, val task loss: 87.518
model_5: train loss: 10.646, train task loss: 1.863 - val loss: 197.663, val task loss: 64.009 [*] Best so far
model_6: train loss: 10.926, train task loss: 1.911 - val loss: 397.252, val task loss: 94.346
model_7: train loss: 8.090, train task loss: 1.772 - val loss: 132.430, val task loss: 53.526

Epoch: 28/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 204.42it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 219.08it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 223.99it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.76it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 230.87it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 233.12it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 233.14it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 7.471, train task loss: 1.542 - val loss: 134.811, val task loss: 53.252
model_2: train loss: 8.141, train task loss: 1.578 - val loss: 205.120, val task loss: 62.171
model_3: train loss: 10.061, train task loss: 1.832 - val loss: 204.506, val task loss: 60.286
model_4: train loss: 9.218, train task loss: 1.635 - val loss: 355.230, val task loss: 88.140
model_5: train loss: 11.067, train task loss: 2.043 - val loss: 207.109, val task loss: 67.114
model_6: train loss: 9.663, train task loss: 1.831 - val loss: 406.534, val task loss: 95.556
model_7: train loss: 7.496, train task loss: 1.567 - val loss: 133.915, val task loss: 52.356 [*] Best so far

Epoch: 29/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 201.23it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 215.62it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.23it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.66it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.09it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 232.18it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 232.15it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 5.974, train task loss: 1.304 - val loss: 129.933, val task loss: 52.080 [*] Best so far
model_2: train loss: 7.729, train task loss: 1.491 - val loss: 203.984, val task loss: 61.629
model_3: train loss: 10.709, train task loss: 1.958 - val loss: 199.058, val task loss: 60.744
model_4: train loss: 8.403, train task loss: 1.573 - val loss: 334.980, val task loss: 85.433
model_5: train loss: 10.417, train task loss: 1.897 - val loss: 192.882, val task loss: 63.285 [*] Best so far
model_6: train loss: 9.958, train task loss: 1.848 - val loss: 394.597, val task loss: 93.657
model_7: train loss: 6.050, train task loss: 1.379 - val loss: 129.665, val task loss: 51.812 [*] Best so far

Epoch: 30/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 202.81it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 218.48it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 226.69it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.12it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 230.42it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 230.65it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 230.56it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 4.963, train task loss: 1.102 - val loss: 129.175, val task loss: 51.638 [*] Best so far
model_2: train loss: 7.443, train task loss: 1.465 - val loss: 207.778, val task loss: 62.789
model_3: train loss: 10.651, train task loss: 2.028 - val loss: 203.383, val task loss: 61.498
model_4: train loss: 7.235, train task loss: 1.429 - val loss: 344.285, val task loss: 87.459
model_5: train loss: 10.258, train task loss: 1.936 - val loss: 202.937, val task loss: 65.621
model_6: train loss: 9.356, train task loss: 1.719 - val loss: 396.424, val task loss: 94.449
model_7: train loss: 4.985, train task loss: 1.124 - val loss: 130.079, val task loss: 52.542
Training with disagreement penalty = 10

Epoch: 1/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 192.98it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 205.74it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 211.34it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 211.73it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 213.96it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 214.54it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 213.57it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 193.430, train task loss: 187.926 - val loss: 140.494, val task loss: 124.620 [*] Best so far
Created directory: ./checkpoints/negative_learning/1/adaptive\10
model_2: train loss: 195.108, train task loss: 186.893 - val loss: 149.608, val task loss: 128.718 [*] Best so far
model_3: train loss: 193.705, train task loss: 186.113 - val loss: 143.085, val task loss: 122.719 [*] Best so far
model_4: train loss: 199.602, train task loss: 190.443 - val loss: 157.628, val task loss: 133.796 [*] Best so far
model_5: train loss: 195.194, train task loss: 187.938 - val loss: 146.414, val task loss: 126.741 [*] Best so far
model_6: train loss: 197.376, train task loss: 188.055 - val loss: 154.378, val task loss: 128.981 [*] Best so far
model_7: train loss: 198.710, train task loss: 193.206 - val loss: 150.844, val task loss: 134.970 [*] Best so far

Epoch: 2/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 193.75it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 212.66it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 222.39it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.98it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 224.88it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 223.50it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 222.86it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 148.261, train task loss: 138.229 - val loss: 114.626, val task loss: 107.629 [*] Best so far
model_2: train loss: 159.525, train task loss: 142.872 - val loss: 124.530, val task loss: 110.077 [*] Best so far
model_3: train loss: 154.148, train task loss: 139.387 - val loss: 120.656, val task loss: 107.529 [*] Best so far
model_4: train loss: 173.617, train task loss: 149.012 - val loss: 139.726, val task loss: 113.711 [*] Best so far
model_5: train loss: 156.464, train task loss: 141.026 - val loss: 122.251, val task loss: 108.977 [*] Best so far
model_6: train loss: 174.559, train task loss: 148.098 - val loss: 141.661, val task loss: 112.605 [*] Best so far
model_7: train loss: 157.228, train task loss: 147.196 - val loss: 118.611, val task loss: 111.615 [*] Best so far

Epoch: 3/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 189.19it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 207.79it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 211.81it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 214.64it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 214.61it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 214.56it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 216.28it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 123.092, train task loss: 115.332 - val loss: 103.246, val task loss: 92.067 [*] Best so far
model_2: train loss: 141.081, train task loss: 120.222 - val loss: 124.389, val task loss: 95.287 [*] Best so far
model_3: train loss: 130.878, train task loss: 117.395 - val loss: 112.784, val task loss: 92.614 [*] Best so far
model_4: train loss: 168.765, train task loss: 129.352 - val loss: 148.184, val task loss: 100.074 [*] Best so far
model_5: train loss: 135.390, train task loss: 118.601 - val loss: 120.979, val task loss: 94.480 [*] Best so far
model_6: train loss: 183.597, train task loss: 131.518 - val loss: 166.165, val task loss: 99.094 [*] Best so far
model_7: train loss: 131.399, train task loss: 123.638 - val loss: 104.104, val task loss: 92.924 [*] Best so far

Epoch: 4/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 193.24it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 211.98it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 221.54it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 224.32it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 227.33it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.12it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 226.80it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 105.935, train task loss: 96.510 - val loss: 100.353, val task loss: 84.190 [*] Best so far
model_2: train loss: 129.753, train task loss: 101.262 - val loss: 132.039, val task loss: 87.245 [*] Best so far
model_3: train loss: 114.916, train task loss: 98.404 - val loss: 123.797, val task loss: 86.202 [*] Best so far
model_4: train loss: 167.870, train task loss: 110.198 - val loss: 192.027, val task loss: 94.725 [*] Best so far
model_5: train loss: 120.764, train task loss: 100.144 - val loss: 123.640, val task loss: 87.072 [*] Best so far
model_6: train loss: 203.146, train task loss: 114.974 - val loss: 241.742, val task loss: 92.802 [*] Best so far
model_7: train loss: 115.994, train task loss: 106.570 - val loss: 103.041, val task loss: 86.878 [*] Best so far

Epoch: 5/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 195.94it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 213.58it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 220.82it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.16it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.97it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 230.76it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.96it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 95.916, train task loss: 85.313 - val loss: 98.711, val task loss: 82.564 [*] Best so far
model_2: train loss: 122.870, train task loss: 91.224 - val loss: 136.095, val task loss: 86.349 [*] Best so far
model_3: train loss: 103.071, train task loss: 87.420 - val loss: 119.395, val task loss: 83.767 [*] Best so far
model_4: train loss: 165.693, train task loss: 100.371 - val loss: 202.457, val task loss: 92.278 [*] Best so far
model_5: train loss: 110.557, train task loss: 90.056 - val loss: 121.034, val task loss: 84.350 [*] Best so far
model_6: train loss: 207.088, train task loss: 106.552 - val loss: 236.032, val task loss: 89.612 [*] Best so far
model_7: train loss: 107.552, train task loss: 96.949 - val loss: 100.609, val task loss: 84.462 [*] Best so far

Epoch: 6/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 197.56it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 217.01it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.55it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.31it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 227.65it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 228.57it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 230.01it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 87.569, train task loss: 77.303 - val loss: 104.500, val task loss: 79.133 [*] Best so far
model_2: train loss: 111.682, train task loss: 84.764 - val loss: 141.805, val task loss: 82.122 [*] Best so far
model_3: train loss: 92.791, train task loss: 79.847 - val loss: 125.030, val task loss: 79.886 [*] Best so far
model_4: train loss: 157.012, train task loss: 94.286 - val loss: 230.267, val task loss: 91.507 [*] Best so far
model_5: train loss: 98.056, train task loss: 82.456 - val loss: 124.667, val task loss: 81.029 [*] Best so far
model_6: train loss: 194.454, train task loss: 99.456 - val loss: 259.133, val task loss: 88.519 [*] Best so far
model_7: train loss: 99.521, train task loss: 89.255 - val loss: 109.735, val task loss: 84.369 [*] Best so far

Epoch: 7/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 201.38it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 219.28it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.70it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.85it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 232.09it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 234.66it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 234.61it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 78.524, train task loss: 68.684 - val loss: 109.491, val task loss: 77.953 [*] Best so far
model_2: train loss: 101.171, train task loss: 74.715 - val loss: 162.424, val task loss: 82.711
model_3: train loss: 83.921, train task loss: 70.980 - val loss: 139.542, val task loss: 80.298
model_4: train loss: 141.134, train task loss: 84.114 - val loss: 250.601, val task loss: 90.840 [*] Best so far
model_5: train loss: 88.022, train task loss: 72.916 - val loss: 132.847, val task loss: 81.376
model_6: train loss: 174.074, train task loss: 89.117 - val loss: 309.923, val task loss: 90.347
model_7: train loss: 88.818, train task loss: 78.979 - val loss: 112.551, val task loss: 81.013 [*] Best so far

Epoch: 8/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 196.71it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 214.75it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.67it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.81it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.73it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.80it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.49it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 68.268, train task loss: 56.030 - val loss: 128.140, val task loss: 74.321 [*] Best so far
model_2: train loss: 93.222, train task loss: 62.829 - val loss: 216.052, val task loss: 81.581 [*] Best so far
model_3: train loss: 75.217, train task loss: 58.328 - val loss: 165.893, val task loss: 77.264 [*] Best so far
model_4: train loss: 130.294, train task loss: 72.260 - val loss: 323.710, val task loss: 93.277
model_5: train loss: 81.111, train task loss: 61.285 - val loss: 162.146, val task loss: 79.063 [*] Best so far
model_6: train loss: 165.518, train task loss: 76.553 - val loss: 383.444, val task loss: 91.136
model_7: train loss: 76.888, train task loss: 64.650 - val loss: 130.659, val task loss: 76.840 [*] Best so far

Epoch: 9/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 202.87it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 217.44it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 225.72it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 227.73it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.65it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.88it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 230.98it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 63.891, train task loss: 44.623 - val loss: 152.195, val task loss: 74.644
model_2: train loss: 96.549, train task loss: 52.902 - val loss: 233.774, val task loss: 78.693 [*] Best so far
model_3: train loss: 74.749, train task loss: 47.568 - val loss: 201.279, val task loss: 76.194 [*] Best so far
model_4: train loss: 141.675, train task loss: 60.479 - val loss: 371.213, val task loss: 92.952
model_5: train loss: 83.190, train task loss: 49.816 - val loss: 207.243, val task loss: 81.079
model_6: train loss: 165.442, train task loss: 64.883 - val loss: 461.892, val task loss: 95.619
model_7: train loss: 67.447, train task loss: 48.178 - val loss: 146.912, val task loss: 69.362 [*] Best so far

Epoch: 10/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 204.42it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 221.05it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 226.35it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 228.90it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 230.17it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.34it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 233.25it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 64.899, train task loss: 35.023 - val loss: 182.333, val task loss: 65.735 [*] Best so far
model_2: train loss: 98.463, train task loss: 40.954 - val loss: 280.023, val task loss: 74.915 [*] Best so far
model_3: train loss: 82.827, train task loss: 38.046 - val loss: 234.388, val task loss: 74.022 [*] Best so far
model_4: train loss: 163.832, train task loss: 52.746 - val loss: 370.405, val task loss: 86.125 [*] Best so far
model_5: train loss: 92.123, train task loss: 40.009 - val loss: 226.830, val task loss: 76.183 [*] Best so far
model_6: train loss: 162.127, train task loss: 51.037 - val loss: 486.148, val task loss: 89.733
model_7: train loss: 66.217, train task loss: 36.341 - val loss: 187.300, val task loss: 70.702

Epoch: 11/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 203.09it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 215.64it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 223.38it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 227.41it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 231.37it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.56it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 233.78it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 81.726, train task loss: 34.187 - val loss: 143.189, val task loss: 69.393
model_2: train loss: 97.812, train task loss: 36.827 - val loss: 222.627, val task loss: 72.691 [*] Best so far
model_3: train loss: 89.114, train task loss: 34.623 - val loss: 193.222, val task loss: 71.441 [*] Best so far
model_4: train loss: 129.181, train task loss: 43.754 - val loss: 359.684, val task loss: 89.159
model_5: train loss: 99.926, train task loss: 37.399 - val loss: 190.488, val task loss: 73.934 [*] Best so far
model_6: train loss: 140.528, train task loss: 44.466 - val loss: 393.057, val task loss: 91.966
model_7: train loss: 82.384, train task loss: 34.844 - val loss: 138.617, val task loss: 64.821 [*] Best so far

Epoch: 12/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 196.79it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.17it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 220.67it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.97it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 228.78it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 230.34it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.57it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 63.050, train task loss: 31.214 - val loss: 151.659, val task loss: 64.978 [*] Best so far
model_2: train loss: 74.083, train task loss: 32.858 - val loss: 236.032, val task loss: 71.726 [*] Best so far
model_3: train loss: 79.845, train task loss: 33.610 - val loss: 219.563, val task loss: 69.492 [*] Best so far
model_4: train loss: 85.097, train task loss: 34.975 - val loss: 396.726, val task loss: 90.231
model_5: train loss: 80.124, train task loss: 33.678 - val loss: 210.971, val task loss: 72.905 [*] Best so far
model_6: train loss: 96.279, train task loss: 37.432 - val loss: 473.495, val task loss: 90.666
model_7: train loss: 63.838, train task loss: 32.002 - val loss: 151.188, val task loss: 64.507 [*] Best so far

Epoch: 13/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 206.17it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 223.01it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 229.55it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 231.19it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 230.23it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.17it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 232.46it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 46.732, train task loss: 24.111 - val loss: 167.048, val task loss: 63.015 [*] Best so far
model_2: train loss: 58.343, train task loss: 26.004 - val loss: 256.651, val task loss: 66.355 [*] Best so far
model_3: train loss: 65.790, train task loss: 26.618 - val loss: 241.363, val task loss: 71.052
model_4: train loss: 68.081, train task loss: 28.675 - val loss: 466.690, val task loss: 89.976
model_5: train loss: 67.911, train task loss: 28.217 - val loss: 254.173, val task loss: 72.456 [*] Best so far
model_6: train loss: 89.286, train task loss: 30.806 - val loss: 552.209, val task loss: 92.989
model_7: train loss: 48.664, train task loss: 26.043 - val loss: 167.667, val task loss: 63.634 [*] Best so far

Epoch: 14/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 197.95it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.09it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 223.46it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 228.32it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 228.76it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.92it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.05it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 39.355, train task loss: 19.189 - val loss: 169.534, val task loss: 61.434 [*] Best so far
model_2: train loss: 54.706, train task loss: 21.336 - val loss: 283.638, val task loss: 68.576
model_3: train loss: 59.963, train task loss: 22.022 - val loss: 249.410, val task loss: 65.003 [*] Best so far
model_4: train loss: 60.151, train task loss: 22.795 - val loss: 476.311, val task loss: 90.082
model_5: train loss: 59.195, train task loss: 22.323 - val loss: 254.910, val task loss: 71.346 [*] Best so far
model_6: train loss: 77.648, train task loss: 25.302 - val loss: 595.418, val task loss: 97.340
model_7: train loss: 39.742, train task loss: 19.576 - val loss: 169.012, val task loss: 60.912 [*] Best so far

Epoch: 15/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 205.22it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 220.81it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 228.27it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 232.67it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 235.27it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 234.55it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 233.91it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 36.309, train task loss: 15.009 - val loss: 183.063, val task loss: 61.007 [*] Best so far
model_2: train loss: 53.547, train task loss: 17.538 - val loss: 287.254, val task loss: 67.989
model_3: train loss: 52.923, train task loss: 17.305 - val loss: 248.241, val task loss: 64.781 [*] Best so far
model_4: train loss: 58.832, train task loss: 18.671 - val loss: 504.258, val task loss: 90.646
model_5: train loss: 53.292, train task loss: 17.611 - val loss: 265.146, val task loss: 70.593 [*] Best so far
model_6: train loss: 71.378, train task loss: 19.890 - val loss: 581.578, val task loss: 92.101
model_7: train loss: 37.296, train task loss: 15.997 - val loss: 179.446, val task loss: 57.390 [*] Best so far

Epoch: 16/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 202.45it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 218.91it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.91it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 228.52it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 231.13it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.91it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 234.08it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 35.785, train task loss: 13.640 - val loss: 184.493, val task loss: 59.146 [*] Best so far
model_2: train loss: 52.467, train task loss: 15.365 - val loss: 328.560, val task loss: 66.681
model_3: train loss: 51.959, train task loss: 15.248 - val loss: 271.861, val task loss: 66.649
model_4: train loss: 50.316, train task loss: 15.691 - val loss: 523.930, val task loss: 89.717
model_5: train loss: 57.079, train task loss: 15.668 - val loss: 290.034, val task loss: 73.759
model_6: train loss: 60.786, train task loss: 17.398 - val loss: 609.124, val task loss: 94.912
model_7: train loss: 35.950, train task loss: 13.805 - val loss: 185.843, val task loss: 60.496

Epoch: 17/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 203.47it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 219.74it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 225.26it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.54it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 232.35it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 228.78it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 229.65it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 32.298, train task loss: 11.581 - val loss: 200.897, val task loss: 58.953 [*] Best so far
model_2: train loss: 51.385, train task loss: 14.353 - val loss: 314.354, val task loss: 65.480 [*] Best so far
model_3: train loss: 50.639, train task loss: 14.152 - val loss: 294.090, val task loss: 65.799
model_4: train loss: 45.148, train task loss: 14.013 - val loss: 547.302, val task loss: 92.087
model_5: train loss: 53.280, train task loss: 14.495 - val loss: 296.718, val task loss: 69.336 [*] Best so far
model_6: train loss: 47.520, train task loss: 14.167 - val loss: 617.123, val task loss: 94.558
model_7: train loss: 33.133, train task loss: 12.416 - val loss: 201.245, val task loss: 59.301

Epoch: 18/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 204.15it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 220.48it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 227.46it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.68it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 231.29it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 233.43it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 234.50it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 39.094, train task loss: 10.715 - val loss: 178.262, val task loss: 57.562 [*] Best so far
model_2: train loss: 47.515, train task loss: 11.642 - val loss: 286.757, val task loss: 64.366 [*] Best so far
model_3: train loss: 53.136, train task loss: 12.669 - val loss: 278.463, val task loss: 63.851 [*] Best so far
model_4: train loss: 51.172, train task loss: 12.152 - val loss: 500.800, val task loss: 88.635
model_5: train loss: 50.229, train task loss: 12.062 - val loss: 283.237, val task loss: 69.815
model_6: train loss: 48.879, train task loss: 11.972 - val loss: 600.255, val task loss: 92.911
model_7: train loss: 39.684, train task loss: 11.305 - val loss: 180.049, val task loss: 59.349

Epoch: 19/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 202.86it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 218.57it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 221.25it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.82it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 231.26it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.14it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 232.83it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 40.876, train task loss: 11.640 - val loss: 181.612, val task loss: 59.202
model_2: train loss: 43.549, train task loss: 11.946 - val loss: 300.094, val task loss: 65.004
model_3: train loss: 50.163, train task loss: 12.402 - val loss: 274.746, val task loss: 64.433
model_4: train loss: 44.466, train task loss: 11.741 - val loss: 476.741, val task loss: 86.829
model_5: train loss: 48.502, train task loss: 12.085 - val loss: 264.062, val task loss: 67.136 [*] Best so far
model_6: train loss: 47.931, train task loss: 12.010 - val loss: 583.161, val task loss: 91.917
model_7: train loss: 41.074, train task loss: 11.838 - val loss: 180.613, val task loss: 58.203

Epoch: 20/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 211.05it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 223.00it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 229.37it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.10it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 230.23it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 232.31it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 232.94it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 35.325, train task loss: 10.220 - val loss: 169.410, val task loss: 57.235 [*] Best so far
model_2: train loss: 42.465, train task loss: 10.574 - val loss: 276.993, val task loss: 63.964 [*] Best so far
model_3: train loss: 45.560, train task loss: 11.367 - val loss: 256.048, val task loss: 65.014
model_4: train loss: 43.794, train task loss: 11.336 - val loss: 481.212, val task loss: 87.586
model_5: train loss: 44.646, train task loss: 10.980 - val loss: 268.986, val task loss: 69.777
model_6: train loss: 42.557, train task loss: 11.062 - val loss: 586.724, val task loss: 91.922
model_7: train loss: 35.981, train task loss: 10.876 - val loss: 169.282, val task loss: 57.107 [*] Best so far

Epoch: 21/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 205.59it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 221.12it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 226.87it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 227.37it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.66it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 230.01it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 233.04it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 30.566, train task loss: 8.718 - val loss: 168.977, val task loss: 56.512 [*] Best so far
model_2: train loss: 37.330, train task loss: 9.280 - val loss: 283.984, val task loss: 63.010 [*] Best so far
model_3: train loss: 35.659, train task loss: 9.233 - val loss: 257.449, val task loss: 61.047 [*] Best so far
model_4: train loss: 39.249, train task loss: 9.358 - val loss: 515.578, val task loss: 89.332
model_5: train loss: 36.050, train task loss: 9.363 - val loss: 263.992, val task loss: 66.142 [*] Best so far
model_6: train loss: 36.533, train task loss: 9.295 - val loss: 620.820, val task loss: 95.818
model_7: train loss: 31.199, train task loss: 9.350 - val loss: 167.811, val task loss: 55.346 [*] Best so far

Epoch: 22/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 202.31it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.14it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 223.14it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 227.06it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 228.38it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.51it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 233.52it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 28.062, train task loss: 8.101 - val loss: 184.238, val task loss: 56.270 [*] Best so far
model_2: train loss: 33.887, train task loss: 8.606 - val loss: 295.861, val task loss: 64.422
model_3: train loss: 30.774, train task loss: 8.070 - val loss: 268.897, val task loss: 61.718
model_4: train loss: 33.127, train task loss: 8.580 - val loss: 508.858, val task loss: 85.833 [*] Best so far
model_5: train loss: 32.104, train task loss: 8.679 - val loss: 277.660, val task loss: 67.157
model_6: train loss: 36.535, train task loss: 8.756 - val loss: 595.629, val task loss: 90.227
model_7: train loss: 28.120, train task loss: 8.158 - val loss: 182.674, val task loss: 54.706 [*] Best so far

Epoch: 23/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 205.46it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 223.00it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 227.31it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 230.69it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 230.80it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.90it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 234.09it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 25.417, train task loss: 6.813 - val loss: 184.235, val task loss: 56.462
model_2: train loss: 32.180, train task loss: 7.436 - val loss: 307.059, val task loss: 62.280 [*] Best so far
model_3: train loss: 26.170, train task loss: 6.790 - val loss: 296.336, val task loss: 63.184
model_4: train loss: 28.289, train task loss: 6.996 - val loss: 546.338, val task loss: 88.325
model_5: train loss: 29.535, train task loss: 7.088 - val loss: 301.822, val task loss: 67.710
model_6: train loss: 34.417, train task loss: 7.622 - val loss: 663.589, val task loss: 94.963
model_7: train loss: 25.534, train task loss: 6.930 - val loss: 181.713, val task loss: 53.940 [*] Best so far

Epoch: 24/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 203.11it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 219.86it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 226.81it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 230.06it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 232.10it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.93it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 234.35it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 18.813, train task loss: 5.741 - val loss: 188.408, val task loss: 55.464 [*] Best so far
model_2: train loss: 26.502, train task loss: 6.230 - val loss: 312.688, val task loss: 62.852
model_3: train loss: 21.280, train task loss: 5.769 - val loss: 274.996, val task loss: 60.605 [*] Best so far
model_4: train loss: 24.587, train task loss: 6.120 - val loss: 541.709, val task loss: 86.955
model_5: train loss: 24.586, train task loss: 6.082 - val loss: 290.624, val task loss: 66.361
model_6: train loss: 30.687, train task loss: 6.793 - val loss: 648.298, val task loss: 93.980
model_7: train loss: 19.083, train task loss: 6.011 - val loss: 187.462, val task loss: 54.518

Epoch: 25/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 203.06it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 218.66it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 225.00it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.13it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 228.53it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.14it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 229.39it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 16.341, train task loss: 5.044 - val loss: 194.348, val task loss: 54.350 [*] Best so far
model_2: train loss: 23.178, train task loss: 5.853 - val loss: 318.692, val task loss: 61.663 [*] Best so far
model_3: train loss: 20.648, train task loss: 5.363 - val loss: 320.087, val task loss: 63.148
model_4: train loss: 23.687, train task loss: 5.820 - val loss: 566.223, val task loss: 88.179
model_5: train loss: 24.898, train task loss: 5.925 - val loss: 320.089, val task loss: 67.409
model_6: train loss: 26.932, train task loss: 6.042 - val loss: 682.732, val task loss: 95.093
model_7: train loss: 16.432, train task loss: 5.135 - val loss: 194.387, val task loss: 54.389

Epoch: 26/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 187.97it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 204.34it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 210.24it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 214.64it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 214.90it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 214.04it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 215.29it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 13.456, train task loss: 4.107 - val loss: 197.208, val task loss: 54.639
model_2: train loss: 19.458, train task loss: 4.759 - val loss: 324.320, val task loss: 62.841
model_3: train loss: 18.889, train task loss: 4.799 - val loss: 302.196, val task loss: 61.412
model_4: train loss: 24.659, train task loss: 5.130 - val loss: 594.536, val task loss: 89.525
model_5: train loss: 23.578, train task loss: 5.180 - val loss: 302.771, val task loss: 65.208 [*] Best so far
model_6: train loss: 23.994, train task loss: 5.301 - val loss: 702.822, val task loss: 97.502
model_7: train loss: 13.892, train task loss: 4.543 - val loss: 195.993, val task loss: 53.424 [*] Best so far

Epoch: 27/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 186.03it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 204.87it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 203.77it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:05, 203.87it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 199.66it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 200.24it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:04, 199.05it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 13.980, train task loss: 3.788 - val loss: 193.512, val task loss: 54.188 [*] Best so far
model_2: train loss: 18.492, train task loss: 4.170 - val loss: 314.017, val task loss: 62.089
model_3: train loss: 18.937, train task loss: 4.091 - val loss: 295.778, val task loss: 60.928
model_4: train loss: 28.864, train task loss: 5.176 - val loss: 530.023, val task loss: 84.792 [*] Best so far
model_5: train loss: 21.951, train task loss: 4.290 - val loss: 303.026, val task loss: 65.397
model_6: train loss: 24.132, train task loss: 4.707 - val loss: 657.156, val task loss: 93.359
model_7: train loss: 14.254, train task loss: 4.062 - val loss: 193.368, val task loss: 54.044

Epoch: 28/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 189.10it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 206.12it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 212.57it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 217.27it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 219.59it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 223.17it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 223.03it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 15.186, train task loss: 3.735 - val loss: 202.142, val task loss: 54.935
model_2: train loss: 19.856, train task loss: 4.036 - val loss: 318.731, val task loss: 61.572 [*] Best so far
model_3: train loss: 19.720, train task loss: 4.164 - val loss: 306.801, val task loss: 60.953
model_4: train loss: 30.049, train task loss: 5.278 - val loss: 597.541, val task loss: 91.317
model_5: train loss: 22.007, train task loss: 4.389 - val loss: 318.666, val task loss: 66.847
model_6: train loss: 28.933, train task loss: 4.891 - val loss: 679.297, val task loss: 96.303
model_7: train loss: 15.172, train task loss: 3.721 - val loss: 200.264, val task loss: 53.057 [*] Best so far

Epoch: 29/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 201.57it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 217.94it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 223.02it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.81it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 228.28it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 228.55it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.00it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 15.683, train task loss: 3.355 - val loss: 195.138, val task loss: 53.956 [*] Best so far
model_2: train loss: 19.955, train task loss: 3.758 - val loss: 322.864, val task loss: 62.647
model_3: train loss: 21.496, train task loss: 3.931 - val loss: 297.777, val task loss: 60.416 [*] Best so far
model_4: train loss: 29.070, train task loss: 4.710 - val loss: 570.976, val task loss: 85.876
model_5: train loss: 20.731, train task loss: 3.895 - val loss: 311.911, val task loss: 64.914 [*] Best so far
model_6: train loss: 30.386, train task loss: 4.718 - val loss: 675.329, val task loss: 92.764
model_7: train loss: 15.991, train task loss: 3.662 - val loss: 193.984, val task loss: 52.801 [*] Best so far

Epoch: 30/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 198.23it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.76it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.29it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 230.05it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 233.00it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 235.09it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 236.54it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 16.136, train task loss: 3.299 - val loss: 199.143, val task loss: 55.198
model_2: train loss: 22.202, train task loss: 3.957 - val loss: 305.151, val task loss: 61.186 [*] Best so far
model_3: train loss: 21.433, train task loss: 3.778 - val loss: 291.588, val task loss: 60.434
model_4: train loss: 26.234, train task loss: 4.335 - val loss: 546.770, val task loss: 87.333
model_5: train loss: 21.310, train task loss: 3.692 - val loss: 293.673, val task loss: 64.550 [*] Best so far
model_6: train loss: 29.809, train task loss: 4.633 - val loss: 642.176, val task loss: 92.953
model_7: train loss: 16.477, train task loss: 3.640 - val loss: 195.739, val task loss: 51.794 [*] Best so far
Finished training student cohort!
Selecting the optimal disgreement penalty via cross-validation...
Best rho: 5 with average task loss: 51.7250
Done!
Method: (best_single), Test_MSE: 53.32421875
Method: (cohort), Test_MSE: [53.32421875, 66.0692138671875, 65.23515319824219, 95.4141006469



Method: (best_single), Test_MSE: 54.86467742919922
Method: (cohort), Test_MSE: [54.86467742919922, 73.9986343383789, 68.25230407714844, 110.51880645751953, 73.34672546386719, 123.37451934814453, 50.90529251098633]
Finished testing indepedent cohort!
Computing adversarial weights by clustering method...
Initialization complete
Iteration 0, inertia 96.54188727370638.
Iteration 1, inertia 34.56048188199202.
Converged at iteration 1: strict convergence.
Computed adversarial weights by clustering method, weights are [0.  0.  0.  0.5 0.  0.5 0. ]

Epoch: 1/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 196.73it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 217.24it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 225.56it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 227.59it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 226.62it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 228.31it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 229.02it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 245.231, train task loss: 199.478 - val loss: 206.848, val task loss: 136.022 [*] Best so far
Created directory: ./checkpoints/negative_learning/1/adversarial\10
model_2: train loss: 190.269, train task loss: 182.701 - val loss: 139.732, val task loss: 134.113 [*] Best so far
model_3: train loss: 191.751, train task loss: 183.903 - val loss: 138.795, val task loss: 132.480 [*] Best so far
model_4: train loss: 192.881, train task loss: 188.744 - val loss: 141.162, val task loss: 136.941 [*] Best so far
model_5: train loss: 192.273, train task loss: 185.058 - val loss: 138.497, val task loss: 132.937 [*] Best so far
model_6: train loss: 187.804, train task loss: 183.667 - val loss: 136.424, val task loss: 132.202 [*] Best so far
model_7: train loss: 353.406, train task loss: 206.493 - val loss: 575.588, val task loss: 165.649 [*] Best so far

Epoch: 2/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 195.84it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.08it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 222.20it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.35it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 228.75it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 230.36it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.35it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 270.783, train task loss: 170.213 - val loss: 191.012, val task loss: 122.294 [*] Best so far
model_2: train loss: 160.161, train task loss: 153.724 - val loss: 127.462, val task loss: 114.389 [*] Best so far
model_3: train loss: 157.380, train task loss: 150.895 - val loss: 126.924, val task loss: 111.141 [*] Best so far
model_4: train loss: 159.726, train task loss: 156.009 - val loss: 123.834, val task loss: 116.331 [*] Best so far
model_5: train loss: 157.284, train task loss: 150.919 - val loss: 122.676, val task loss: 110.934 [*] Best so far
model_6: train loss: 156.634, train task loss: 152.918 - val loss: 119.537, val task loss: 112.034 [*] Best so far
model_7: train loss: 406.204, train task loss: 178.535 - val loss: 604.952, val task loss: 156.947 [*] Best so far

Epoch: 3/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 208.68it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 221.39it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 227.00it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 230.91it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 230.91it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.45it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 232.83it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 231.061, train task loss: 139.055 - val loss: 185.662, val task loss: 112.043 [*] Best so far
model_2: train loss: 136.530, train task loss: 126.603 - val loss: 112.057, val task loss: 97.643 [*] Best so far
model_3: train loss: 134.735, train task loss: 124.633 - val loss: 109.782, val task loss: 95.045 [*] Best so far
model_4: train loss: 134.473, train task loss: 129.007 - val loss: 106.717, val task loss: 99.156 [*] Best so far
model_5: train loss: 135.319, train task loss: 124.968 - val loss: 110.029, val task loss: 96.242 [*] Best so far
model_6: train loss: 133.025, train task loss: 127.559 - val loss: 105.201, val task loss: 97.639 [*] Best so far
model_7: train loss: 329.212, train task loss: 158.177 - val loss: 302.180, val task loss: 119.673 [*] Best so far

Epoch: 4/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 196.47it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.35it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 223.49it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 227.50it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.11it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 232.35it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 232.27it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 179.022, train task loss: 121.915 - val loss: 151.217, val task loss: 96.374 [*] Best so far
model_2: train loss: 121.815, train task loss: 110.679 - val loss: 116.382, val task loss: 93.848 [*] Best so far
model_3: train loss: 121.295, train task loss: 108.798 - val loss: 112.665, val task loss: 91.423 [*] Best so far
model_4: train loss: 117.939, train task loss: 111.655 - val loss: 106.233, val task loss: 95.045 [*] Best so far
model_5: train loss: 122.321, train task loss: 108.380 - val loss: 114.005, val task loss: 91.479 [*] Best so far
model_6: train loss: 116.762, train task loss: 110.478 - val loss: 104.806, val task loss: 93.617 [*] Best so far
model_7: train loss: 329.735, train task loss: 137.050 - val loss: 335.414, val task loss: 102.242 [*] Best so far

Epoch: 5/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 198.16it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 215.43it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 221.60it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 227.06it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 228.53it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.37it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 232.87it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 149.508, train task loss: 111.093 - val loss: 147.620, val task loss: 96.372 [*] Best so far
model_2: train loss: 113.857, train task loss: 100.024 - val loss: 116.619, val task loss: 91.154 [*] Best so far
model_3: train loss: 113.520, train task loss: 100.090 - val loss: 118.244, val task loss: 89.597 [*] Best so far
model_4: train loss: 107.818, train task loss: 101.356 - val loss: 107.178, val task loss: 93.278 [*] Best so far
model_5: train loss: 115.864, train task loss: 100.888 - val loss: 117.501, val task loss: 90.511 [*] Best so far
model_6: train loss: 106.702, train task loss: 100.240 - val loss: 106.333, val task loss: 92.432 [*] Best so far
model_7: train loss: 313.617, train task loss: 122.110 - val loss: 284.815, val task loss: 91.980 [*] Best so far

Epoch: 6/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 201.40it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 219.12it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 222.74it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 225.79it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.40it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.91it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 232.75it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 145.578, train task loss: 106.989 - val loss: 148.207, val task loss: 93.405 [*] Best so far
model_2: train loss: 105.759, train task loss: 93.205 - val loss: 124.456, val task loss: 90.740 [*] Best so far
model_3: train loss: 105.337, train task loss: 92.217 - val loss: 129.118, val task loss: 89.507 [*] Best so far
model_4: train loss: 98.366, train task loss: 92.131 - val loss: 109.672, val task loss: 92.964 [*] Best so far
model_5: train loss: 106.286, train task loss: 92.895 - val loss: 126.429, val task loss: 89.235 [*] Best so far
model_6: train loss: 96.637, train task loss: 90.402 - val loss: 110.095, val task loss: 93.387
model_7: train loss: 277.904, train task loss: 113.324 - val loss: 278.585, val task loss: 90.515 [*] Best so far

Epoch: 7/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 208.76it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 226.06it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 228.23it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 231.03it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 232.14it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 232.48it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 234.39it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 196.591, train task loss: 108.345 - val loss: 214.352, val task loss: 100.369
model_2: train loss: 104.388, train task loss: 84.633 - val loss: 148.524, val task loss: 89.587 [*] Best so far
model_3: train loss: 105.055, train task loss: 84.565 - val loss: 144.474, val task loss: 87.400 [*] Best so far
model_4: train loss: 91.516, train task loss: 81.794 - val loss: 122.739, val task loss: 88.294 [*] Best so far
model_5: train loss: 104.477, train task loss: 84.948 - val loss: 139.535, val task loss: 87.763 [*] Best so far
model_6: train loss: 90.273, train task loss: 80.551 - val loss: 129.683, val task loss: 95.239
model_7: train loss: 316.986, train task loss: 111.558 - val loss: 310.105, val task loss: 92.192

Epoch: 8/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 202.21it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 214.18it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 216.57it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 221.28it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 225.27it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 227.16it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 227.82it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 300.631, train task loss: 111.916 - val loss: 307.303, val task loss: 103.234
model_2: train loss: 107.373, train task loss: 78.109 - val loss: 161.296, val task loss: 85.267 [*] Best so far
model_3: train loss: 106.833, train task loss: 77.226 - val loss: 161.162, val task loss: 82.943 [*] Best so far
model_4: train loss: 85.144, train task loss: 72.016 - val loss: 120.165, val task loss: 86.321 [*] Best so far
model_5: train loss: 101.044, train task loss: 77.452 - val loss: 156.743, val task loss: 83.333 [*] Best so far
model_6: train loss: 84.851, train task loss: 71.723 - val loss: 122.039, val task loss: 88.195 [*] Best so far
model_7: train loss: 425.165, train task loss: 113.922 - val loss: 359.528, val task loss: 94.287

Epoch: 9/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 202.42it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.51it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 222.90it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 228.26it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.82it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.89it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 233.43it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 328.548, train task loss: 98.545 - val loss: 249.889, val task loss: 90.245 [*] Best so far
model_2: train loss: 97.906, train task loss: 64.557 - val loss: 180.817, val task loss: 81.529 [*] Best so far
model_3: train loss: 97.937, train task loss: 63.628 - val loss: 190.716, val task loss: 80.656 [*] Best so far
model_4: train loss: 76.911, train task loss: 58.603 - val loss: 138.351, val task loss: 86.991
model_5: train loss: 93.703, train task loss: 63.875 - val loss: 171.945, val task loss: 82.107 [*] Best so far
model_6: train loss: 77.399, train task loss: 59.091 - val loss: 142.486, val task loss: 91.127
model_7: train loss: 454.188, train task loss: 108.851 - val loss: 389.767, val task loss: 87.850 [*] Best so far

Epoch: 10/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 201.83it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.57it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 223.73it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.04it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 227.03it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.35it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 229.87it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 279.169, train task loss: 87.090 - val loss: 257.278, val task loss: 87.082 [*] Best so far
model_2: train loss: 81.329, train task loss: 49.885 - val loss: 190.982, val task loss: 81.414 [*] Best so far
model_3: train loss: 86.129, train task loss: 50.518 - val loss: 192.663, val task loss: 76.997 [*] Best so far
model_4: train loss: 69.854, train task loss: 47.866 - val loss: 150.125, val task loss: 87.984
model_5: train loss: 81.842, train task loss: 49.648 - val loss: 199.038, val task loss: 79.932 [*] Best so far
model_6: train loss: 69.913, train task loss: 47.925 - val loss: 156.048, val task loss: 93.907
model_7: train loss: 423.914, train task loss: 101.987 - val loss: 438.841, val task loss: 85.969 [*] Best so far

Epoch: 11/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 201.89it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 217.22it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 222.74it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 225.96it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 226.17it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 225.50it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 221.96it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 260.737, train task loss: 75.860 - val loss: 312.786, val task loss: 85.536 [*] Best so far
model_2: train loss: 73.508, train task loss: 40.103 - val loss: 202.793, val task loss: 76.642 [*] Best so far
model_3: train loss: 80.726, train task loss: 41.475 - val loss: 210.042, val task loss: 74.636 [*] Best so far
model_4: train loss: 67.288, train task loss: 39.051 - val loss: 165.227, val task loss: 86.794
model_5: train loss: 81.219, train task loss: 42.066 - val loss: 211.910, val task loss: 76.693 [*] Best so far
model_6: train loss: 68.913, train task loss: 40.676 - val loss: 167.494, val task loss: 89.061
model_7: train loss: 463.567, train task loss: 97.008 - val loss: 456.541, val task loss: 85.682 [*] Best so far

Epoch: 12/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 197.94it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 214.83it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 220.62it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 223.77it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 226.55it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 227.26it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 227.79it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 223.486, train task loss: 67.227 - val loss: 260.699, val task loss: 78.850 [*] Best so far
model_2: train loss: 73.984, train task loss: 36.821 - val loss: 206.116, val task loss: 74.390 [*] Best so far
model_3: train loss: 82.878, train task loss: 38.110 - val loss: 209.492, val task loss: 70.994 [*] Best so far
model_4: train loss: 74.644, train task loss: 36.519 - val loss: 156.217, val task loss: 82.953 [*] Best so far
model_5: train loss: 80.008, train task loss: 38.641 - val loss: 209.665, val task loss: 71.844 [*] Best so far
model_6: train loss: 76.572, train task loss: 38.447 - val loss: 163.730, val task loss: 90.465
model_7: train loss: 394.853, train task loss: 88.748 - val loss: 395.313, val task loss: 82.033 [*] Best so far

Epoch: 13/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 174.87it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 199.52it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 199.99it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 206.99it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 210.20it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 209.31it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 208.59it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 163.261, train task loss: 54.438 - val loss: 258.266, val task loss: 77.139 [*] Best so far
model_2: train loss: 68.953, train task loss: 32.301 - val loss: 204.570, val task loss: 72.319 [*] Best so far
model_3: train loss: 74.792, train task loss: 33.745 - val loss: 208.226, val task loss: 70.286 [*] Best so far
model_4: train loss: 75.226, train task loss: 35.434 - val loss: 162.664, val task loss: 83.723
model_5: train loss: 70.383, train task loss: 32.695 - val loss: 207.873, val task loss: 73.085
model_6: train loss: 73.863, train task loss: 34.071 - val loss: 166.159, val task loss: 87.219 [*] Best so far
model_7: train loss: 330.944, train task loss: 76.049 - val loss: 448.562, val task loss: 75.958 [*] Best so far

Epoch: 14/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 190.77it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 210.20it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 217.81it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 222.66it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 225.14it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 225.12it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 224.22it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 110.562, train task loss: 42.707 - val loss: 239.479, val task loss: 69.473 [*] Best so far
model_2: train loss: 63.910, train task loss: 30.385 - val loss: 213.890, val task loss: 73.171
model_3: train loss: 62.530, train task loss: 29.655 - val loss: 200.745, val task loss: 69.144 [*] Best so far
model_4: train loss: 66.918, train task loss: 31.329 - val loss: 167.135, val task loss: 83.528
model_5: train loss: 63.148, train task loss: 30.350 - val loss: 204.674, val task loss: 69.638 [*] Best so far
model_6: train loss: 69.243, train task loss: 33.654 - val loss: 171.906, val task loss: 88.299
model_7: train loss: 252.045, train task loss: 60.946 - val loss: 437.916, val task loss: 69.471 [*] Best so far

Epoch: 15/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 191.27it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 210.13it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 216.40it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 219.47it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 216.81it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:04, 216.55it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 218.70it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 82.703, train task loss: 32.397 - val loss: 264.092, val task loss: 68.935 [*] Best so far
model_2: train loss: 59.028, train task loss: 26.370 - val loss: 240.254, val task loss: 72.189 [*] Best so far
model_3: train loss: 57.366, train task loss: 27.027 - val loss: 222.365, val task loss: 67.089 [*] Best so far
model_4: train loss: 60.032, train task loss: 27.924 - val loss: 172.136, val task loss: 84.224
model_5: train loss: 59.310, train task loss: 26.704 - val loss: 228.737, val task loss: 69.974
model_6: train loss: 60.624, train task loss: 28.516 - val loss: 177.183, val task loss: 89.271
model_7: train loss: 188.647, train task loss: 44.628 - val loss: 546.085, val task loss: 67.960 [*] Best so far

Epoch: 16/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 194.78it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 209.58it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:05, 216.90it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 221.63it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 223.64it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 225.19it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 226.87it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 59.571, train task loss: 26.270 - val loss: 285.945, val task loss: 66.224 [*] Best so far
model_2: train loss: 47.477, train task loss: 23.165 - val loss: 232.013, val task loss: 69.759 [*] Best so far
model_3: train loss: 43.948, train task loss: 22.130 - val loss: 226.021, val task loss: 67.539
model_4: train loss: 45.040, train task loss: 23.478 - val loss: 170.290, val task loss: 82.539 [*] Best so far
model_5: train loss: 48.054, train task loss: 22.954 - val loss: 241.449, val task loss: 70.192
model_6: train loss: 45.749, train task loss: 24.186 - val loss: 179.878, val task loss: 92.127
model_7: train loss: 156.455, train task loss: 35.489 - val loss: 576.701, val task loss: 63.744 [*] Best so far

Epoch: 17/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 199.64it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 214.48it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 217.80it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 222.33it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 224.52it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 226.63it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:02<00:03, 228.37it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 50.823, train task loss: 21.343 - val loss: 312.509, val task loss: 61.815 [*] Best so far
model_2: train loss: 42.729, train task loss: 19.575 - val loss: 268.451, val task loss: 68.350 [*] Best so far
model_3: train loss: 40.742, train task loss: 19.586 - val loss: 266.338, val task loss: 65.042 [*] Best so far
model_4: train loss: 37.836, train task loss: 19.601 - val loss: 192.614, val task loss: 84.238
model_5: train loss: 45.720, train task loss: 20.666 - val loss: 257.588, val task loss: 65.911 [*] Best so far
model_6: train loss: 38.045, train task loss: 19.810 - val loss: 199.887, val task loss: 91.511
model_7: train loss: 141.807, train task loss: 29.659 - val loss: 609.540, val task loss: 59.359 [*] Best so far

Epoch: 18/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 203.18it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 217.59it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.05it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.40it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 227.66it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.07it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 225.58it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 45.348, train task loss: 17.523 - val loss: 301.851, val task loss: 62.237
model_2: train loss: 42.499, train task loss: 17.038 - val loss: 273.285, val task loss: 69.223
model_3: train loss: 40.649, train task loss: 16.533 - val loss: 251.393, val task loss: 66.013
model_4: train loss: 37.319, train task loss: 16.873 - val loss: 181.925, val task loss: 85.387
model_5: train loss: 44.431, train task loss: 17.176 - val loss: 260.164, val task loss: 68.242
model_6: train loss: 37.995, train task loss: 17.549 - val loss: 186.706, val task loss: 90.168
model_7: train loss: 98.198, train task loss: 21.792 - val loss: 630.702, val task loss: 57.658 [*] Best so far

Epoch: 19/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 200.22it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 219.28it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 224.32it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.28it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 227.51it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.80it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 230.21it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 42.837, train task loss: 15.416 - val loss: 301.924, val task loss: 61.953
model_2: train loss: 41.288, train task loss: 15.118 - val loss: 287.834, val task loss: 71.538
model_3: train loss: 46.694, train task loss: 15.319 - val loss: 265.336, val task loss: 66.999
model_4: train loss: 38.499, train task loss: 15.498 - val loss: 191.425, val task loss: 85.569
model_5: train loss: 43.872, train task loss: 15.241 - val loss: 273.865, val task loss: 68.279
model_6: train loss: 38.617, train task loss: 15.616 - val loss: 196.040, val task loss: 90.184
model_7: train loss: 85.489, train task loss: 18.574 - val loss: 607.871, val task loss: 56.755 [*] Best so far

Epoch: 20/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 205.13it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.79it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 222.14it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 225.32it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 228.12it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.66it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 230.55it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 38.611, train task loss: 14.298 - val loss: 326.268, val task loss: 59.738 [*] Best so far
model_2: train loss: 43.058, train task loss: 14.564 - val loss: 283.776, val task loss: 66.702 [*] Best so far
model_3: train loss: 50.456, train task loss: 15.930 - val loss: 264.856, val task loss: 64.682 [*] Best so far
model_4: train loss: 38.180, train task loss: 14.517 - val loss: 180.539, val task loss: 81.402 [*] Best so far
model_5: train loss: 44.846, train task loss: 15.147 - val loss: 261.774, val task loss: 67.992
model_6: train loss: 38.099, train task loss: 14.437 - val loss: 193.637, val task loss: 94.500
model_7: train loss: 69.790, train task loss: 16.742 - val loss: 611.047, val task loss: 56.605 [*] Best so far

Epoch: 21/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 206.98it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 220.38it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 221.81it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 224.43it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 227.25it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 228.72it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 229.82it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 38.699, train task loss: 12.847 - val loss: 308.930, val task loss: 60.620
model_2: train loss: 44.477, train task loss: 13.791 - val loss: 276.455, val task loss: 67.667
model_3: train loss: 47.533, train task loss: 14.028 - val loss: 263.041, val task loss: 64.261 [*] Best so far
model_4: train loss: 43.425, train task loss: 13.906 - val loss: 202.124, val task loss: 83.711
model_5: train loss: 52.709, train task loss: 14.602 - val loss: 277.887, val task loss: 67.833
model_6: train loss: 43.793, train task loss: 14.274 - val loss: 210.629, val task loss: 92.216
model_7: train loss: 63.672, train task loss: 14.881 - val loss: 628.192, val task loss: 57.385

Epoch: 22/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 199.59it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 217.61it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 222.94it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 226.27it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 228.80it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.49it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 230.64it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 32.803, train task loss: 11.956 - val loss: 332.806, val task loss: 58.999 [*] Best so far
model_2: train loss: 40.013, train task loss: 12.956 - val loss: 281.439, val task loss: 67.158
model_3: train loss: 40.233, train task loss: 12.843 - val loss: 288.140, val task loss: 64.226 [*] Best so far
model_4: train loss: 38.308, train task loss: 12.895 - val loss: 189.398, val task loss: 82.661
model_5: train loss: 48.122, train task loss: 13.923 - val loss: 290.240, val task loss: 67.792
model_6: train loss: 39.000, train task loss: 13.587 - val loss: 197.885, val task loss: 91.148
model_7: train loss: 48.876, train task loss: 13.143 - val loss: 641.655, val task loss: 54.475 [*] Best so far

Epoch: 23/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 201.85it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 215.33it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 222.44it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.33it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.69it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 230.29it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 233.34it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 31.803, train task loss: 10.643 - val loss: 310.894, val task loss: 58.619 [*] Best so far
model_2: train loss: 35.442, train task loss: 11.322 - val loss: 259.039, val task loss: 66.310 [*] Best so far
model_3: train loss: 39.246, train task loss: 11.668 - val loss: 260.788, val task loss: 63.707 [*] Best so far
model_4: train loss: 37.599, train task loss: 11.716 - val loss: 187.325, val task loss: 83.677
model_5: train loss: 43.866, train task loss: 12.517 - val loss: 262.263, val task loss: 67.208
model_6: train loss: 38.523, train task loss: 12.640 - val loss: 193.068, val task loss: 89.419
model_7: train loss: 43.860, train task loss: 11.794 - val loss: 612.804, val task loss: 54.552

Epoch: 24/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 209.85it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 219.82it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 221.64it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 222.98it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 224.79it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 225.97it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 227.86it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 27.990, train task loss: 10.114 - val loss: 335.247, val task loss: 58.206 [*] Best so far
model_2: train loss: 30.680, train task loss: 10.348 - val loss: 291.370, val task loss: 65.841 [*] Best so far
model_3: train loss: 35.271, train task loss: 11.298 - val loss: 279.192, val task loss: 62.736 [*] Best so far
model_4: train loss: 31.250, train task loss: 10.877 - val loss: 186.288, val task loss: 83.982
model_5: train loss: 37.982, train task loss: 11.685 - val loss: 308.720, val task loss: 66.874
model_6: train loss: 31.339, train task loss: 10.966 - val loss: 192.990, val task loss: 90.684
model_7: train loss: 36.692, train task loss: 11.361 - val loss: 666.667, val task loss: 53.865 [*] Best so far

Epoch: 25/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 208.97it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 217.17it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 223.83it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 224.70it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.15it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.52it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.39it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 26.229, train task loss: 8.664 - val loss: 359.066, val task loss: 59.457
model_2: train loss: 26.537, train task loss: 8.842 - val loss: 295.790, val task loss: 64.855 [*] Best so far
model_3: train loss: 29.542, train task loss: 8.909 - val loss: 280.906, val task loss: 66.300
model_4: train loss: 27.309, train task loss: 9.323 - val loss: 215.085, val task loss: 85.638
model_5: train loss: 37.679, train task loss: 10.014 - val loss: 297.861, val task loss: 67.982
model_6: train loss: 27.157, train task loss: 9.172 - val loss: 220.616, val task loss: 91.168
model_7: train loss: 31.193, train task loss: 8.967 - val loss: 666.759, val task loss: 53.844 [*] Best so far

Epoch: 26/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 198.23it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 214.05it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 221.64it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 224.74it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 228.24it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 230.00it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.35it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 28.652, train task loss: 8.226 - val loss: 349.707, val task loss: 56.326 [*] Best so far
model_2: train loss: 27.853, train task loss: 8.143 - val loss: 302.773, val task loss: 64.979
model_3: train loss: 30.977, train task loss: 8.665 - val loss: 277.816, val task loss: 62.407 [*] Best so far
model_4: train loss: 31.419, train task loss: 8.954 - val loss: 199.760, val task loss: 82.976
model_5: train loss: 35.228, train task loss: 8.999 - val loss: 309.163, val task loss: 66.246
model_6: train loss: 31.779, train task loss: 9.313 - val loss: 210.443, val task loss: 93.659
model_7: train loss: 29.460, train task loss: 8.495 - val loss: 673.165, val task loss: 52.309 [*] Best so far

Epoch: 27/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 203.64it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 220.22it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 225.16it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.56it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 231.27it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 232.75it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 235.48it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 29.417, train task loss: 8.026 - val loss: 339.324, val task loss: 56.420
model_2: train loss: 30.243, train task loss: 8.229 - val loss: 304.173, val task loss: 64.172 [*] Best so far
model_3: train loss: 29.643, train task loss: 8.169 - val loss: 292.656, val task loss: 63.073
model_4: train loss: 35.194, train task loss: 9.178 - val loss: 202.356, val task loss: 83.887
model_5: train loss: 32.821, train task loss: 8.412 - val loss: 285.065, val task loss: 65.270 [*] Best so far
model_6: train loss: 35.025, train task loss: 9.009 - val loss: 209.346, val task loss: 90.877
model_7: train loss: 31.781, train task loss: 8.008 - val loss: 642.335, val task loss: 53.416

Epoch: 28/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:06, 202.03it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 216.64it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 223.08it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 229.67it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.59it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 229.44it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 230.45it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 29.152, train task loss: 7.359 - val loss: 359.235, val task loss: 56.375
model_2: train loss: 29.047, train task loss: 7.357 - val loss: 306.558, val task loss: 65.793
model_3: train loss: 28.686, train task loss: 7.314 - val loss: 296.120, val task loss: 62.330 [*] Best so far
model_4: train loss: 33.454, train task loss: 7.895 - val loss: 213.710, val task loss: 84.884
model_5: train loss: 30.822, train task loss: 7.710 - val loss: 312.054, val task loss: 65.421
model_6: train loss: 34.411, train task loss: 8.852 - val loss: 220.698, val task loss: 91.872
model_7: train loss: 31.183, train task loss: 7.495 - val loss: 673.865, val task loss: 52.514

Epoch: 29/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 205.98it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 218.72it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 222.91it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 232.64it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 231.17it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 231.13it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 231.79it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 27.767, train task loss: 7.263 - val loss: 361.034, val task loss: 56.871
model_2: train loss: 27.316, train task loss: 7.179 - val loss: 308.942, val task loss: 64.426
model_3: train loss: 27.461, train task loss: 7.393 - val loss: 287.428, val task loss: 62.215 [*] Best so far
model_4: train loss: 30.559, train task loss: 7.984 - val loss: 213.245, val task loss: 83.194
model_5: train loss: 26.603, train task loss: 7.289 - val loss: 297.924, val task loss: 65.687
model_6: train loss: 30.982, train task loss: 8.408 - val loss: 222.788, val task loss: 92.738
model_7: train loss: 27.829, train task loss: 7.272 - val loss: 644.398, val task loss: 50.882 [*] Best so far

Epoch: 30/30 - LR: 0.001000



  0%|                                                                                     | 0/1280 [00:00<?, ?it/s][A
  5%|███▊                                                                       | 64/1280 [00:00<00:05, 206.36it/s][A
 10%|███████▍                                                                  | 128/1280 [00:00<00:05, 221.17it/s][A
 15%|███████████                                                               | 192/1280 [00:00<00:04, 227.78it/s][A
 20%|██████████████▊                                                           | 256/1280 [00:01<00:04, 227.21it/s][A
 25%|██████████████████▌                                                       | 320/1280 [00:01<00:04, 229.00it/s][A
 30%|██████████████████████▏                                                   | 384/1280 [00:01<00:03, 230.05it/s][A
 35%|█████████████████████████▉                                                | 448/1280 [00:01<00:03, 230.49it/s][A
 40%|█████████████████████████████▌            

model_1: train loss: 28.731, train task loss: 7.428 - val loss: 349.464, val task loss: 56.030 [*] Best so far
model_2: train loss: 30.762, train task loss: 7.871 - val loss: 314.735, val task loss: 65.094
model_3: train loss: 29.038, train task loss: 7.354 - val loss: 290.420, val task loss: 61.784 [*] Best so far
model_4: train loss: 32.973, train task loss: 8.238 - val loss: 202.768, val task loss: 83.264
model_5: train loss: 28.395, train task loss: 7.463 - val loss: 295.154, val task loss: 64.339 [*] Best so far
model_6: train loss: 33.280, train task loss: 8.545 - val loss: 209.326, val task loss: 89.822
model_7: train loss: 29.463, train task loss: 7.400 - val loss: 639.971, val task loss: 52.279


Repetitions: 100%|██████████████████████████████████████████████████████████████████| 1/1 [10:24<00:00, 624.66s/it]

Method: (best_single), Test_MSE: 54.58601379394531
Method: (cohort), Test_MSE: [57.060401916503906, 72.99630737304688, 71.11065673828125, 93.23255920410156, 71.61067199707031, 110.03755187988281, 54.58601379394531]
Finished running adversarial meta fusion!





In [125]:
results = pd.concat(results, ignore_index=True)

TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"

In [126]:
results

Unnamed: 0,Method,Test_metric,best_rho,cohort_pairs,cluster_idxs,random_state,dim_modalities,n,n_train,n_val,n_test
0,best_single,53.324219,,,,1,"[2000, 400]",2000,1280,320,400
1,cohort,"[53.32421875, 66.0692138671875, 65.23515319824...",5.0,"[(80, 60), (80, 0), (60, 60), (60, 0), (50, 60...","[0, 6]",1,"[2000, 400]",2000,1280,320,400
2,indep_best_single,54.864677,,,,1,"[2000, 400]",2000,1280,320,400
3,indep_cohort,"[54.86467742919922, 73.9986343383789, 68.25230...",,"[(80, 60), (80, 0), (60, 60), (60, 0), (50, 60...",,1,"[2000, 400]",2000,1280,320,400
4,adversarial_best_single,54.586014,,,,1,"[2000, 400]",2000,1280,320,400
5,adversarial_cohort,"[57.060401916503906, 72.99630737304688, 71.110...",10.0,"[(80, 60), (80, 0), (60, 60), (60, 0), (50, 60...","[3, 5]",1,"[2000, 400]",2000,1280,320,400


In [127]:
results["Test_metric"][1]

[53.32421875,
 66.0692138671875,
 65.23515319824219,
 95.41410064697266,
 69.11542510986328,
 121.18408203125,
 50.805694580078125]

In [128]:
results["Test_metric"][3]

[54.86467742919922,
 73.9986343383789,
 68.25230407714844,
 110.51880645751953,
 73.34672546386719,
 123.37451934814453,
 50.90529251098633]

In [129]:
results["Test_metric"][5]

[57.060401916503906,
 72.99630737304688,
 71.11065673828125,
 93.23255920410156,
 71.61067199707031,
 110.03755187988281,
 54.58601379394531]