In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import random
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import shutil

sys.path.append('../')
from meta_fusion.benchmarks import *
from meta_fusion.methods import *
from meta_fusion.models import *
from meta_fusion.utils import *
from meta_fusion.third_party import *
from meta_fusion.synthetic_data import PrepareSyntheticData
from meta_fusion.config import *
from meta_fusion.methodsextra import *


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/opt/anaconda3/envs/fusion_stable310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/anaconda3/envs/fusion_stable310/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/opt/anaconda3/envs/fusion_stable310/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/envs/fusion_stable310/lib/python3.10/site-packages/traitlets/config/application.py",

In [2]:
#########################
# Experiment parameters #
#########################
if True:
    # Parse input arguments
    print ('Number of arguments:', len(sys.argv), 'arguments.')
    print ('Argument List:', str(sys.argv))
    if len(sys.argv) != 2:
        print("Error: incorrect number of parameters.")
        quit()

    seed = 1234
    print(seed)

# Fixed data parameters
repetition=1

# Data model parameters
n = 2000
dim_modalities = [500, 400]
dim_latent = [20, 30, 0]
noise_ratios = [0.4, 0.4]
trans_type = ["linear", "linear", "linear"]
mod_prop = [1, 1, 0, 0]
interactive_prop = 0

# mod_outs = [[0, 200, 300, 400, 500], [0, 100, 200, 300, 400]]
mod_outs = [[0, 500], [0, 400]]
num_modalities = 2
combined_hiddens = [128, 64]
mod_hiddens = [[256], [256]]

# data parameters
data_name = 'regression'
exp_name = data_name + "_" + "linear_early"
output_dim = 1  # specify the output dimension for regression


extractor_type = 'separate'
separate=True
is_mod_static=[False]*num_modalities
freeze_mod_extractors=[False]*num_modalities

# Load default model configurations 
config = load_config('../experiments_synthetic/config.json')
extractor_config = load_config('../experiments_synthetic/config_extractor.json')

# Model files directory
ckpt_dir = f"./checkpoints/{exp_name}/seed{seed}/"
config['ckpt_dir'] = extractor_config['ckpt_dir'] = ckpt_dir

# Update other training parameters
config['output_dim'] = extractor_config['output_dim'] = output_dim
config["init_lr"] = 0.001
config["ensemble_methods"] = [
        "simple_average",
        "weighted_average",
        "meta_learner",
        "greedy_ensemble"
        ]
extractor_config["init_lr"] = [0.001] * num_modalities
extractor_config["weight_decay"] = [0] * num_modalities

#####################
#    Load Dataset   #
#####################
data_preparer = PrepareSyntheticData(data_name = data_name, test_size = 0.2, val_size = 0.2)
print(f"Finished generating {exp_name} dataset.")
sys.stdout.flush() 


###############
# Output file #
###############i:
outdir = f"./results/{exp_name}/"
os.makedirs(outdir, exist_ok=True)
outfile_name = f"seed{seed}"
outfile = outdir + outfile_name + ".txt"
print("Output file: {:s}".format(outfile), end="\n")
sys.stdout.flush()


# Header for results file
def add_header(results):
    results['extractor']=extractor_type
    results['weight_type'] = config['divergence_weight_type'] 
    return results




Number of arguments: 2 arguments.
Argument List: ['/opt/anaconda3/envs/fusion_stable310/lib/python3.10/site-packages/ipykernel_launcher.py', '--f=/Users/parnian/Library/Jupyter/runtime/kernel-v3c8604cc7a5aa2caed39cf4980d1fddda8e882702.json']
1234
Finished generating regression_linear_early dataset.
Output file: ./results/regression_linear_early/seed1234.txt


In [5]:
#####################
# Define Experiment #
#####################
def run_single_experiment(config, extractor_config, n, random_state, 
                          mod_outs, combined_hiddens, mod_hiddens,
                          separate, is_mod_static, freeze_mod_extractors,
                          run_oracle=False, run_coop=True, run_all_at_once=False):


    config['random_state'] = random_state
    extractor_config['random_state'] = random_state
    res_list = []
    best_rho = {}
    cohort_pairs = {}
    ens_idxs = {}
    cluster_idxs = {}


    #----------------#
    # Split dataset  #
    #----------------#
    train_loader, val_loader, test_loader, oracle_train_loader, oracle_val_loader, oracle_test_loader =\
    data_preparer.get_data_loaders(n, trans_type=trans_type, mod_prop=mod_prop, 
                                    interactive_prop = interactive_prop,
                                    dim_modalities=dim_modalities, dim_latent=dim_latent,
                                    noise_ratios=noise_ratios, random_state=random_state)
    # Get data info
    data_info = data_preparer.get_data_info()
    n = data_info[1]
    n_train = data_info[2]
    n_val = data_info[3]
    n_test = data_info[4]

    print(f"Finished splitting {data_name} dataset. Data information are summarized below:\n"
            f"Modality dimensions: {dim_modalities}\n"
            f"Data size: {n}\n"
            f"Train size: {n_train}\n"
            f"Val size: {n_val}\n"
            f"Test size: {n_test}")
    sys.stdout.flush() 

    #------------------#
    # Benchmark models #
    #------------------#
    bm_extractor = Extractors([[d,0] for d in dim_modalities], dim_modalities, train_loader, val_loader)
    _ = bm_extractor.get_dummy_extractors()
    bm_cohort = Cohorts(extractors=bm_extractor, combined_hidden_layers=combined_hiddens, output_dim=output_dim)

    if run_oracle:
        oracle_dims = [dim_latent[0], dim_latent[1]+dim_latent[2]]
        oracle_extractor = Extractors([[d,0] for d in oracle_dims], oracle_dims, oracle_train_loader, oracle_val_loader)
        _ = oracle_extractor.get_dummy_extractors()
        oracle_cohort = Cohorts(extractors=oracle_extractor, combined_hidden_layers=combined_hiddens, output_dim=output_dim)

    #----------------------------#
    # Proposed model: Meta Fuse  #
    #----------------------------#
    meta_extractor = Extractors(mod_outs, dim_modalities, train_loader, val_loader)
    if (extractor_type == 'encoder') or (extractor_type == 'separate'):
        _ = meta_extractor.get_encoder_extractors(mod_hiddens, separate=separate, config=extractor_config)
    elif extractor_type == 'PCA':
        _ = meta_extractor.get_PCA_extractors()
    meta_cohort = Cohorts(extractors=meta_extractor, combined_hidden_layers=combined_hiddens, output_dim=output_dim,
                            is_mod_static=is_mod_static, freeze_mod_extractors=freeze_mod_extractors)


    #------------------------------#
    #  Train and test benchmarks   #
    #------------------------------#
    bm_models = bm_cohort.get_cohort_models()
    _, bm_dims = bm_cohort.get_cohort_info()
    bm = Benchmarks(config, bm_models, bm_dims, [train_loader, val_loader])
    bm.train()
    res = bm.test(test_loader)
    res_list.append(res)
    print(f"Finished running basic benchmarks!")

    if run_oracle:
        oracle_config = config
        oracle_config["init_lr"] = 0.001
        oracle_models = oracle_cohort.get_cohort_models()
        _, oracle_dims = oracle_cohort.get_cohort_info()
        oracle = Benchmarks(config, oracle_models, oracle_dims, [oracle_train_loader, oracle_val_loader])
        oracle.train()
        res = oracle.test(oracle_test_loader)
        res = {f"oracle_{key}": value for key, value in res.items()}
        res_list.append(res)
        print(f"Finished running oracle benchmarks!")
        
    if run_coop:
        bm_models = bm_cohort.get_cohort_models()
        _, bm_dims = bm_cohort.get_cohort_info()    
        coop = Coop(config, bm_models, bm_dims, [train_loader, val_loader])
        coop.train()
        res = coop.test(test_loader)
        res_list.append(res)
        best_rho['coop'] = coop.best_rho
        print(f"Finished running coop!")


    #------------------------------#
    #  Train and test Meta Fuse   #
    #------------------------------#
    cohort_models = meta_cohort.get_cohort_models()
    _, dim_pairs = meta_cohort.get_cohort_info()
    ###### Only change the two following.
    metafuse = Trainer(config, cohort_models, [train_loader, val_loader]) # New trainer function. 
    metafuse.train() 
    res = metafuse.test(test_loader) # No need to change test: simple_averaging in test_regresion() also # performance of each student on the test data, cohort_accuracy: automaticall printed and stored. 
    res = {f"metafusion_{k}": v for k, v in res.items()}
    res_list.append(res)
    metafuse.train_ablation() # This is just late fusion with student cohort.
    res = metafuse.test_ablation(test_loader) # I don't need this, no need to have different rhos.
    res = {f"indep_{k}": v for k, v in res.items()}
    res_list.append(res)

    best_rho['metafusion'] = metafuse.best_rho
    cohort_pairs['metafusion'] = dim_pairs
    cohort_pairs['indep'] = dim_pairs

    if "greedy_ensemble" in config["ensemble_methods"]:
        ens_idxs['metafusion_greedy_ensemble'] = metafuse.ens_idxs  

    if config['divergence_weight_type'] == "clustering":
        cluster_idxs['metafusion'] = metafuse.cluster_idxs

    print(f"Finished running meta fusion!")


    #----------------------------#
    # Proposed model: Joint train#
    #----------------------------#
    joint_extractor = Extractors(mod_outs, dim_modalities, train_loader, val_loader)
    if (extractor_type == 'encoder') or (extractor_type == 'separate'):
        _ = joint_extractor.get_encoder_extractors(mod_hiddens, separate=separate, config=extractor_config)
    elif extractor_type == 'PCA':
        _ = joint_extractor.get_PCA_extractors()
    joint_cohort = Cohorts(extractors=joint_extractor, combined_hidden_layers=combined_hiddens, output_dim=output_dim,
                          is_mod_static=is_mod_static, freeze_mod_extractors=freeze_mod_extractors)

    # ------------------------------#
    #  Train and test Joint train  #
    # ------------------------------#
    cohort_models = joint_cohort.get_cohort_models()
    _, dim_pairs = joint_cohort.get_cohort_info()
    ###### Only change the two following.
    jointmodel = Trainer_Joint(config, cohort_models, [train_loader, val_loader]) # New trainer function. 
    jointmodel.train('marginal') 
    res = jointmodel.test(test_loader) # No need to change test: simple_averaging in test_regresion() also # performance of each student on the test data, cohort_accuracy: automaticall printed and stored. 
    res = {f"jointlearning_{k}": v for k, v in res.items()}
    res_list.append(res)
    cohort_pairs['cohort'] = dim_pairs

    if "greedy_ensemble" in config["ensemble_methods"]:
        ens_idxs['jointlearning_greedy_ensemble'] = jointmodel.ens_idxs  


    print(f"Finished running joint fusion!")

    #----------------------------#
    # Proposed model: Negative Correlation Learning#
    #----------------------------#
    joint_extractor = Extractors(mod_outs, dim_modalities, train_loader, val_loader)
    if (extractor_type == 'encoder') or (extractor_type == 'separate'):
        _ = joint_extractor.get_encoder_extractors(mod_hiddens, separate=separate, config=extractor_config)
    elif extractor_type == 'PCA':
        _ = joint_extractor.get_PCA_extractors()
    NCL_cohort = Cohorts(extractors=joint_extractor, combined_hidden_layers=combined_hiddens, output_dim=output_dim,
                            is_mod_static=is_mod_static, freeze_mod_extractors=freeze_mod_extractors)

    #------------------------------#
    #  Train and test NCL train  #
    #------------------------------#
    NCL_models = NCL_cohort.get_cohort_models()
    _, dim_pairs = NCL_cohort.get_cohort_info()
    ###### Only change the two following.
    ncl_model = Trainer_NCL(config, NCL_models, [train_loader, val_loader]) # New trainer function. 
    ncl_model.train() 
    res = ncl_model.test(test_loader) # No need to change test: simple_averaging in test_regresion() also # performance of each student on the test data, cohort_accuracy: automaticall printed and stored. 
    res = {f"ncl_{k}": v for k, v in res.items()}
    res_list.append(res)
    cohort_pairs['ncl'] = dim_pairs
    best_rho['ncl'] = ncl_model.best_rho

    if "greedy_ensemble" in config["ensemble_methods"]:
        ens_idxs['ncl_greedy_ensemble'] = ncl_model.ens_idxs  


    print(f"Finished running NCL fusion!")

    #----------------------------#
    # Proposed model: Shapley train#
    #----------------------------#
    joint_extractor = Extractors(mod_outs, dim_modalities, train_loader, val_loader)
    if (extractor_type == 'encoder') or (extractor_type == 'separate'):
        _ = joint_extractor.get_encoder_extractors(mod_hiddens, separate=separate, config=extractor_config)
    elif extractor_type == 'PCA':
        _ = joint_extractor.get_PCA_extractors()
    joint_cohort = Cohorts(extractors=joint_extractor, combined_hidden_layers=combined_hiddens, output_dim=output_dim,
                        is_mod_static=is_mod_static, freeze_mod_extractors=freeze_mod_extractors)

    #------------------------------#
    #  Train and test shapley train  #
    #------------------------------#
    cohort_models = joint_cohort.get_cohort_models()
    _, dim_pairs = joint_cohort.get_cohort_info()
    ###### Only change the two following.
    jointmodel = Trainer_Joint(config, cohort_models, [train_loader, val_loader]) # New trainer function. 
    jointmodel.train('shapley') 
    res = jointmodel.test(test_loader) # No need to change test: simple_averaging in test_regresion() also # performance of each student on the test data, cohort_accuracy: automaticall printed and stored. 
    res = {f"shapley_{k}": v for k, v in res.items()}
    res_list.append(res)
    cohort_pairs['cohort'] = dim_pairs

    if "greedy_ensemble" in config["ensemble_methods"]:
        ens_idxs['shapley_greedy_ensemble'] = jointmodel.ens_idxs  


    print(f"Finished running shapley fusion!")        

    results = []
    for i, res in enumerate(res_list):
        for method, val in res.items():
            results.append({'Method': method, 'Test_metric': val, 
                            'best_rho':best_rho.get(method.split('_')[0]), 'cohort_pairs':cohort_pairs.get(method.split('_')[0]),
                            'ensemble_idxs': ens_idxs.get(method), 'cluster_idxs': cluster_idxs.get(method.split('_')[0])})

    results = pd.DataFrame(results)
    results['random_state']=random_state
    results["dim_modalities"] = [dim_modalities] * len(results)
    results['n'] = n
    results['n_train'] = n_train
    results['n_val'] = n_val
    results['n_test'] = n_test 

    return results




In [6]:
#####################
#  Run Experiments  #
#####################
results = []

for i in tqdm(range(1, repetition+1), desc="Repetitions", leave=True, position=0):
    print(f'Running with repetition {i}...')
    random_state = repetition * (seed-1) + i
    # print(random_state)
    set_random_seed(random_state)

    # Run experiment
    tmp = run_single_experiment(config, extractor_config, n, random_state, 
                                mod_outs, combined_hiddens, mod_hiddens,
                                separate, is_mod_static, freeze_mod_extractors,
                                run_oracle=False, run_coop=True, run_all_at_once=False)
    
    results.append(tmp)


results = pd.concat(results, ignore_index=True)

add_header(results)

Repetitions:   0%|          | 0/1 [00:00<?, ?it/s]

Running with repetition 1...
Finished splitting regression dataset. Data information are summarized below:
Modality dimensions: [500, 400]
Data size: 2000
Train size: 1280
Val size: 320
Test size: 400
Start training benchmark models...
Training with disagreement penalty = 0

Epoch: 1/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3226.35it/s]


model_1: train loss: 130.209, train task loss: 130.209 - val loss: 104.447, val task loss: 104.447 [*] Best so far
model_2: train loss: 130.523, train task loss: 130.523 - val loss: 107.967, val task loss: 107.967 [*] Best so far
model_3: train loss: 121.284, train task loss: 121.284 - val loss: 79.854, val task loss: 79.854 [*] Best so far

Epoch: 2/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2915.82it/s]


model_1: train loss: 100.687, train task loss: 100.687 - val loss: 73.974, val task loss: 73.974 [*] Best so far
model_2: train loss: 96.264, train task loss: 96.264 - val loss: 65.101, val task loss: 65.101 [*] Best so far
model_3: train loss: 48.412, train task loss: 48.412 - val loss: 14.612, val task loss: 14.612 [*] Best so far

Epoch: 3/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2759.70it/s]


model_1: train loss: 80.245, train task loss: 80.245 - val loss: 75.729, val task loss: 75.729
model_2: train loss: 53.974, train task loss: 53.974 - val loss: 59.545, val task loss: 59.545 [*] Best so far
model_3: train loss: 8.021, train task loss: 8.021 - val loss: 5.031, val task loss: 5.031 [*] Best so far

Epoch: 4/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3042.71it/s]


model_1: train loss: 73.417, train task loss: 73.417 - val loss: 76.285, val task loss: 76.285
model_2: train loss: 47.215, train task loss: 47.215 - val loss: 56.859, val task loss: 56.859 [*] Best so far
model_3: train loss: 3.826, train task loss: 3.826 - val loss: 4.695, val task loss: 4.695 [*] Best so far

Epoch: 5/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3195.21it/s]


model_1: train loss: 68.697, train task loss: 68.697 - val loss: 78.510, val task loss: 78.510
model_2: train loss: 43.462, train task loss: 43.462 - val loss: 58.751, val task loss: 58.751
model_3: train loss: 2.727, train task loss: 2.727 - val loss: 4.509, val task loss: 4.509 [*] Best so far

Epoch: 6/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3404.48it/s]


model_1: train loss: 62.428, train task loss: 62.428 - val loss: 79.413, val task loss: 79.413
model_2: train loss: 40.300, train task loss: 40.300 - val loss: 58.397, val task loss: 58.397
model_3: train loss: 2.082, train task loss: 2.082 - val loss: 4.672, val task loss: 4.672

Epoch: 7/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3580.62it/s]


model_1: train loss: 55.038, train task loss: 55.038 - val loss: 86.012, val task loss: 86.012
model_2: train loss: 37.064, train task loss: 37.064 - val loss: 59.407, val task loss: 59.407
model_3: train loss: 1.614, train task loss: 1.614 - val loss: 4.625, val task loss: 4.625

Epoch: 8/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3013.25it/s]


model_1: train loss: 46.841, train task loss: 46.841 - val loss: 90.627, val task loss: 90.627
model_2: train loss: 33.478, train task loss: 33.478 - val loss: 59.747, val task loss: 59.747
model_3: train loss: 1.333, train task loss: 1.333 - val loss: 4.672, val task loss: 4.672

Epoch: 9/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3341.76it/s]


model_1: train loss: 39.713, train task loss: 39.713 - val loss: 96.566, val task loss: 96.566
model_2: train loss: 30.245, train task loss: 30.245 - val loss: 61.688, val task loss: 61.688
model_3: train loss: 1.107, train task loss: 1.107 - val loss: 4.820, val task loss: 4.820

Epoch: 10/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3044.52it/s]


model_1: train loss: 32.161, train task loss: 32.161 - val loss: 105.311, val task loss: 105.311
model_2: train loss: 26.863, train task loss: 26.863 - val loss: 63.606, val task loss: 63.606
model_3: train loss: 0.881, train task loss: 0.881 - val loss: 4.732, val task loss: 4.732
Finished training benchmark models!
Method: (modality_1), Test_MSE: 73.76573944091797
Method: (modality_2), Test_MSE: 58.63313293457031
Method: (early_fusion), Test_MSE: 3.9357616901397705
Method: (late_fusion), Test_MSE: 37.30242156982422
Finished running basic benchmarks!
Start training benchmark models...
Training with disagreement penalty = 0

Epoch: 1/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2924.24it/s]


model_1: train loss: 110.665, train task loss: 110.665 - val loss: 57.537, val task loss: 57.537 [*] Best so far
model_2: train loss: 121.475, train task loss: 121.475 - val loss: 57.537, val task loss: 57.537 [*] Best so far
model_3: train loss: 121.475, train task loss: 121.475 - val loss: 57.537, val task loss: 57.537 [*] Best so far

Epoch: 2/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3113.28it/s]


model_1: train loss: 28.917, train task loss: 28.917 - val loss: 25.663, val task loss: 25.663 [*] Best so far
model_2: train loss: 51.288, train task loss: 51.288 - val loss: 25.663, val task loss: 25.663 [*] Best so far
model_3: train loss: 51.288, train task loss: 51.288 - val loss: 25.663, val task loss: 25.663 [*] Best so far

Epoch: 3/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3214.54it/s]


model_1: train loss: 24.400, train task loss: 24.400 - val loss: 6.524, val task loss: 6.524 [*] Best so far
model_2: train loss: 8.268, train task loss: 8.268 - val loss: 6.524, val task loss: 6.524 [*] Best so far
model_3: train loss: 8.268, train task loss: 8.268 - val loss: 6.524, val task loss: 6.524 [*] Best so far

Epoch: 4/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3264.48it/s]


model_1: train loss: 4.355, train task loss: 4.355 - val loss: 4.807, val task loss: 4.807 [*] Best so far
model_2: train loss: 3.886, train task loss: 3.886 - val loss: 4.807, val task loss: 4.807 [*] Best so far
model_3: train loss: 3.886, train task loss: 3.886 - val loss: 4.807, val task loss: 4.807 [*] Best so far

Epoch: 5/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3159.11it/s]


model_1: train loss: 2.801, train task loss: 2.801 - val loss: 4.745, val task loss: 4.745 [*] Best so far
model_2: train loss: 2.811, train task loss: 2.811 - val loss: 4.745, val task loss: 4.745 [*] Best so far
model_3: train loss: 2.811, train task loss: 2.811 - val loss: 4.745, val task loss: 4.745 [*] Best so far

Epoch: 6/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3229.36it/s]


model_1: train loss: 2.156, train task loss: 2.156 - val loss: 4.724, val task loss: 4.724 [*] Best so far
model_2: train loss: 2.215, train task loss: 2.215 - val loss: 4.724, val task loss: 4.724 [*] Best so far
model_3: train loss: 2.215, train task loss: 2.215 - val loss: 4.724, val task loss: 4.724 [*] Best so far

Epoch: 7/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3234.97it/s]


model_1: train loss: 1.658, train task loss: 1.658 - val loss: 4.726, val task loss: 4.726
model_2: train loss: 1.767, train task loss: 1.767 - val loss: 4.726, val task loss: 4.726
model_3: train loss: 1.767, train task loss: 1.767 - val loss: 4.726, val task loss: 4.726

Epoch: 8/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3242.06it/s]


model_1: train loss: 1.322, train task loss: 1.322 - val loss: 4.998, val task loss: 4.998
model_2: train loss: 1.447, train task loss: 1.447 - val loss: 4.998, val task loss: 4.998
model_3: train loss: 1.447, train task loss: 1.447 - val loss: 4.998, val task loss: 4.998

Epoch: 9/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3253.77it/s]


model_1: train loss: 1.040, train task loss: 1.040 - val loss: 4.951, val task loss: 4.951
model_2: train loss: 1.178, train task loss: 1.178 - val loss: 4.951, val task loss: 4.951
model_3: train loss: 1.178, train task loss: 1.178 - val loss: 4.951, val task loss: 4.951

Epoch: 10/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3255.21it/s]


model_1: train loss: 0.812, train task loss: 0.812 - val loss: 5.052, val task loss: 5.052
model_2: train loss: 0.949, train task loss: 0.949 - val loss: 5.052, val task loss: 5.052
model_3: train loss: 0.949, train task loss: 0.949 - val loss: 5.052, val task loss: 5.052
Training with disagreement penalty = 0.99

Epoch: 1/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3227.94it/s]


model_1: train loss: 121.937, train task loss: 120.670 - val loss: 87.280, val task loss: 86.333 [*] Best so far
model_2: train loss: 123.260, train task loss: 122.566 - val loss: 90.358, val task loss: 86.333 [*] Best so far
model_3: train loss: 123.260, train task loss: 122.566 - val loss: 90.358, val task loss: 86.333 [*] Best so far

Epoch: 2/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3098.35it/s]


model_1: train loss: 58.726, train task loss: 58.004 - val loss: 32.697, val task loss: 32.416 [*] Best so far
model_2: train loss: 78.708, train task loss: 59.117 - val loss: 68.011, val task loss: 32.416 [*] Best so far
model_3: train loss: 78.708, train task loss: 59.117 - val loss: 68.011, val task loss: 32.416 [*] Best so far

Epoch: 3/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3244.59it/s]


model_1: train loss: 29.832, train task loss: 29.542 - val loss: 33.488, val task loss: 33.212
model_2: train loss: 63.104, train task loss: 30.067 - val loss: 66.099, val task loss: 33.212
model_3: train loss: 63.104, train task loss: 30.067 - val loss: 66.099, val task loss: 33.212

Epoch: 4/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3266.46it/s]


model_1: train loss: 27.219, train task loss: 26.921 - val loss: 32.947, val task loss: 32.768
model_2: train loss: 56.528, train task loss: 27.464 - val loss: 67.669, val task loss: 32.768
model_3: train loss: 56.528, train task loss: 27.464 - val loss: 67.669, val task loss: 32.768

Epoch: 5/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3238.90it/s]


model_1: train loss: 23.693, train task loss: 23.460 - val loss: 32.555, val task loss: 32.337 [*] Best so far
model_2: train loss: 50.726, train task loss: 23.914 - val loss: 69.169, val task loss: 32.337 [*] Best so far
model_3: train loss: 50.726, train task loss: 23.914 - val loss: 69.169, val task loss: 32.337 [*] Best so far

Epoch: 6/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3336.49it/s]


model_1: train loss: 20.655, train task loss: 20.438 - val loss: 34.007, val task loss: 33.841
model_2: train loss: 43.855, train task loss: 20.846 - val loss: 73.173, val task loss: 33.841
model_3: train loss: 43.855, train task loss: 20.846 - val loss: 73.173, val task loss: 33.841

Epoch: 7/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3327.05it/s]


model_1: train loss: 15.822, train task loss: 15.666 - val loss: 35.898, val task loss: 35.771
model_2: train loss: 36.870, train task loss: 15.962 - val loss: 78.018, val task loss: 35.771
model_3: train loss: 36.870, train task loss: 15.962 - val loss: 78.018, val task loss: 35.771

Epoch: 8/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3168.28it/s]


model_1: train loss: 13.948, train task loss: 13.808 - val loss: 42.006, val task loss: 41.890
model_2: train loss: 29.914, train task loss: 14.060 - val loss: 85.899, val task loss: 41.890
model_3: train loss: 29.914, train task loss: 14.060 - val loss: 85.899, val task loss: 41.890

Epoch: 9/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3209.62it/s]


model_1: train loss: 10.877, train task loss: 10.766 - val loss: 42.858, val task loss: 42.768
model_2: train loss: 23.867, train task loss: 10.950 - val loss: 91.352, val task loss: 42.768
model_3: train loss: 23.867, train task loss: 10.950 - val loss: 91.352, val task loss: 42.768

Epoch: 10/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3305.97it/s]


model_1: train loss: 8.324, train task loss: 8.235 - val loss: 46.491, val task loss: 46.426
model_2: train loss: 18.867, train task loss: 8.371 - val loss: 93.965, val task loss: 46.426
model_3: train loss: 18.867, train task loss: 8.371 - val loss: 93.965, val task loss: 46.426
Finished training benchmark models!
Selecting the optimal disgreement penalty via cross-validation...
Best rho: 0 with average task loss: 4.7235
Done!
Method: (coop), Test_MSE: 4.119237899780273
Finished running coop!
Start training student cohort...
Training with disagreement penalty = 0

Epoch: 1/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3744.77it/s]


model_1: train loss: 122.902, train task loss: 122.902 - val loss: 85.245, val task loss: 85.245 [*] Best so far
model_2: train loss: 129.627, train task loss: 129.627 - val loss: 104.223, val task loss: 104.223 [*] Best so far
model_3: train loss: 130.258, train task loss: 130.258 - val loss: 107.293, val task loss: 107.293 [*] Best so far

Epoch: 2/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3773.44it/s]


model_1: train loss: 49.136, train task loss: 49.136 - val loss: 12.892, val task loss: 12.892 [*] Best so far
model_2: train loss: 97.191, train task loss: 97.191 - val loss: 79.564, val task loss: 79.564 [*] Best so far
model_3: train loss: 91.883, train task loss: 91.883 - val loss: 63.687, val task loss: 63.687 [*] Best so far

Epoch: 3/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3792.55it/s]


model_1: train loss: 7.638, train task loss: 7.638 - val loss: 5.178, val task loss: 5.178 [*] Best so far
model_2: train loss: 80.908, train task loss: 80.908 - val loss: 76.250, val task loss: 76.250 [*] Best so far
model_3: train loss: 53.241, train task loss: 53.241 - val loss: 60.457, val task loss: 60.457 [*] Best so far

Epoch: 4/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3739.88it/s]


model_1: train loss: 3.834, train task loss: 3.834 - val loss: 4.596, val task loss: 4.596 [*] Best so far
model_2: train loss: 73.924, train task loss: 73.924 - val loss: 77.241, val task loss: 77.241
model_3: train loss: 46.757, train task loss: 46.757 - val loss: 57.716, val task loss: 57.716 [*] Best so far

Epoch: 5/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3598.21it/s]


model_1: train loss: 2.616, train task loss: 2.616 - val loss: 4.613, val task loss: 4.613
model_2: train loss: 68.880, train task loss: 68.880 - val loss: 77.791, val task loss: 77.791
model_3: train loss: 42.989, train task loss: 42.989 - val loss: 58.397, val task loss: 58.397

Epoch: 6/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3726.70it/s]


model_1: train loss: 2.025, train task loss: 2.025 - val loss: 4.519, val task loss: 4.519 [*] Best so far
model_2: train loss: 63.437, train task loss: 63.437 - val loss: 81.932, val task loss: 81.932
model_3: train loss: 39.776, train task loss: 39.776 - val loss: 58.953, val task loss: 58.953

Epoch: 7/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3686.85it/s]


model_1: train loss: 1.613, train task loss: 1.613 - val loss: 4.657, val task loss: 4.657
model_2: train loss: 55.977, train task loss: 55.977 - val loss: 86.168, val task loss: 86.168
model_3: train loss: 36.163, train task loss: 36.163 - val loss: 59.678, val task loss: 59.678

Epoch: 8/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3730.49it/s]


model_1: train loss: 1.262, train task loss: 1.262 - val loss: 4.708, val task loss: 4.708
model_2: train loss: 47.576, train task loss: 47.576 - val loss: 91.293, val task loss: 91.293
model_3: train loss: 33.042, train task loss: 33.042 - val loss: 61.014, val task loss: 61.014

Epoch: 9/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3741.10it/s]


model_1: train loss: 0.948, train task loss: 0.948 - val loss: 4.835, val task loss: 4.835
model_2: train loss: 39.823, train task loss: 39.823 - val loss: 101.121, val task loss: 101.121
model_3: train loss: 29.686, train task loss: 29.686 - val loss: 62.402, val task loss: 62.402

Epoch: 10/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 3737.97it/s]


model_1: train loss: 0.755, train task loss: 0.755 - val loss: 5.068, val task loss: 5.068
model_2: train loss: 32.674, train task loss: 32.674 - val loss: 106.447, val task loss: 106.447
model_3: train loss: 26.290, train task loss: 26.290 - val loss: 64.388, val task loss: 64.388
Training with disagreement penalty = 0.99
Computing divergence weights by clustering method...
Initialization complete
Iteration 0, inertia 343.5144026443013.
Iteration 1, inertia 171.75720132215065.
Converged at iteration 1: strict convergence.
Computed divergence weights by clustering method, weights are [1. 0. 0.]

Epoch: 1/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2732.46it/s]


model_1: train loss: 1.575, train task loss: 1.575 - val loss: 4.734, val task loss: 4.734 [*] Best so far
model_2: train loss: 147.149, train task loss: 75.098 - val loss: 146.744, val task loss: 76.838 [*] Best so far
model_3: train loss: 84.452, train task loss: 43.580 - val loss: 113.867, val task loss: 59.235 [*] Best so far

Epoch: 2/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2870.45it/s]


model_1: train loss: 1.277, train task loss: 1.277 - val loss: 4.691, val task loss: 4.691 [*] Best so far
model_2: train loss: 134.170, train task loss: 68.230 - val loss: 152.358, val task loss: 79.825
model_3: train loss: 75.768, train task loss: 39.280 - val loss: 114.944, val task loss: 59.786

Epoch: 3/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2864.50it/s]


model_1: train loss: 0.986, train task loss: 0.986 - val loss: 4.813, val task loss: 4.813
model_2: train loss: 117.773, train task loss: 59.869 - val loss: 157.800, val task loss: 82.709
model_3: train loss: 67.549, train task loss: 34.590 - val loss: 117.501, val task loss: 61.344

Epoch: 4/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2936.08it/s]


model_1: train loss: 0.800, train task loss: 0.800 - val loss: 5.020, val task loss: 5.020
model_2: train loss: 101.624, train task loss: 51.817 - val loss: 170.181, val task loss: 88.245
model_3: train loss: 58.968, train task loss: 30.345 - val loss: 120.505, val task loss: 62.387

Epoch: 5/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2873.31it/s]


model_1: train loss: 0.627, train task loss: 0.627 - val loss: 4.997, val task loss: 4.997
model_2: train loss: 85.575, train task loss: 43.460 - val loss: 184.081, val task loss: 95.991
model_3: train loss: 50.379, train task loss: 26.011 - val loss: 127.809, val task loss: 65.678

Epoch: 6/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2874.67it/s]


model_1: train loss: 0.473, train task loss: 0.473 - val loss: 5.163, val task loss: 5.163
model_2: train loss: 70.555, train task loss: 35.885 - val loss: 198.990, val task loss: 104.905
model_3: train loss: 43.916, train task loss: 22.470 - val loss: 125.302, val task loss: 65.445

Epoch: 7/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2784.50it/s]


model_1: train loss: 0.381, train task loss: 0.381 - val loss: 5.214, val task loss: 5.214
model_2: train loss: 55.462, train task loss: 28.262 - val loss: 207.885, val task loss: 108.585
model_3: train loss: 36.849, train task loss: 18.953 - val loss: 137.005, val task loss: 70.866

Epoch: 8/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2901.78it/s]


model_1: train loss: 0.286, train task loss: 0.286 - val loss: 5.245, val task loss: 5.245
model_2: train loss: 44.781, train task loss: 22.808 - val loss: 223.405, val task loss: 116.079
model_3: train loss: 30.510, train task loss: 15.692 - val loss: 134.443, val task loss: 69.703

Epoch: 9/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2872.88it/s]


model_1: train loss: 0.198, train task loss: 0.198 - val loss: 5.374, val task loss: 5.374
model_2: train loss: 35.514, train task loss: 18.128 - val loss: 229.611, val task loss: 119.041
model_3: train loss: 24.994, train task loss: 12.784 - val loss: 144.069, val task loss: 73.813

Epoch: 10/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2887.34it/s]


model_1: train loss: 0.150, train task loss: 0.150 - val loss: 5.423, val task loss: 5.423
model_2: train loss: 29.298, train task loss: 14.862 - val loss: 235.253, val task loss: 122.462
model_3: train loss: 19.486, train task loss: 9.961 - val loss: 150.137, val task loss: 77.048
Training with disagreement penalty = 3

Epoch: 1/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2909.13it/s]


model_1: train loss: 122.055, train task loss: 122.055 - val loss: 84.094, val task loss: 84.094 [*] Best so far
model_2: train loss: 130.146, train task loss: 129.216 - val loss: 110.717, val task loss: 105.358 [*] Best so far
model_3: train loss: 131.396, train task loss: 129.671 - val loss: 115.335, val task loss: 106.224 [*] Best so far

Epoch: 2/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2911.18it/s]


model_1: train loss: 48.042, train task loss: 48.042 - val loss: 14.140, val task loss: 14.140 [*] Best so far
model_2: train loss: 156.605, train task loss: 100.060 - val loss: 242.900, val task loss: 85.030 [*] Best so far
model_3: train loss: 170.994, train task loss: 92.668 - val loss: 288.197, val task loss: 69.838 [*] Best so far

Epoch: 3/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2813.54it/s]


model_1: train loss: 8.539, train task loss: 8.539 - val loss: 6.112, val task loss: 6.112 [*] Best so far
model_2: train loss: 315.629, train task loss: 83.183 - val loss: 285.477, val task loss: 77.740 [*] Best so far
model_3: train loss: 251.343, train task loss: 55.816 - val loss: 219.853, val task loss: 63.861 [*] Best so far

Epoch: 4/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2692.70it/s]


model_1: train loss: 4.147, train task loss: 4.147 - val loss: 4.786, val task loss: 4.786 [*] Best so far
model_2: train loss: 289.897, train task loss: 74.940 - val loss: 296.204, val task loss: 76.344 [*] Best so far
model_3: train loss: 160.768, train task loss: 46.166 - val loss: 229.032, val task loss: 58.078 [*] Best so far

Epoch: 5/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2744.52it/s]


model_1: train loss: 2.686, train task loss: 2.686 - val loss: 4.702, val task loss: 4.702 [*] Best so far
model_2: train loss: 264.446, train task loss: 67.745 - val loss: 285.554, val task loss: 79.751
model_3: train loss: 157.650, train task loss: 41.010 - val loss: 226.236, val task loss: 59.974

Epoch: 6/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2705.81it/s]


model_1: train loss: 2.079, train task loss: 2.079 - val loss: 4.693, val task loss: 4.693 [*] Best so far
model_2: train loss: 235.110, train task loss: 61.244 - val loss: 310.653, val task loss: 82.035
model_3: train loss: 142.451, train task loss: 37.292 - val loss: 229.097, val task loss: 59.689

Epoch: 7/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2280.05it/s]


model_1: train loss: 1.640, train task loss: 1.640 - val loss: 4.734, val task loss: 4.734
model_2: train loss: 209.541, train task loss: 53.461 - val loss: 322.687, val task loss: 87.147
model_3: train loss: 125.998, train task loss: 33.584 - val loss: 234.170, val task loss: 61.259

Epoch: 8/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2564.71it/s]


model_1: train loss: 1.325, train task loss: 1.325 - val loss: 4.755, val task loss: 4.755
model_2: train loss: 173.794, train task loss: 45.003 - val loss: 352.706, val task loss: 94.153
model_3: train loss: 111.309, train task loss: 29.171 - val loss: 240.741, val task loss: 62.872

Epoch: 9/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2669.44it/s]


model_1: train loss: 1.030, train task loss: 1.030 - val loss: 4.802, val task loss: 4.802
model_2: train loss: 142.469, train task loss: 36.914 - val loss: 393.110, val task loss: 103.914
model_3: train loss: 98.462, train task loss: 25.816 - val loss: 248.959, val task loss: 65.179

Epoch: 10/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2621.16it/s]


model_1: train loss: 0.816, train task loss: 0.816 - val loss: 5.146, val task loss: 5.146
model_2: train loss: 118.722, train task loss: 30.942 - val loss: 419.408, val task loss: 110.475
model_3: train loss: 83.296, train task loss: 22.287 - val loss: 260.381, val task loss: 66.041
Finished training student cohort!
Selecting the optimal disgreement penalty via cross-validation...
Best rho: 0 with average task loss: 4.5194
Done!
Training meta learner on the best cohort...


1280it [00:00, 3886.66it/s]


meta_learner: train task loss: 50.954 - val task loss: 9.023 [*] Best so far


1280it [00:00, 3871.54it/s]


meta_learner: train task loss: 5.836 - val task loss: 6.712 [*] Best so far


1280it [00:00, 3791.27it/s]


meta_learner: train task loss: 2.880 - val task loss: 5.418 [*] Best so far


1280it [00:00, 3941.57it/s]


meta_learner: train task loss: 2.485 - val task loss: 5.122 [*] Best so far


1280it [00:00, 3927.08it/s]


meta_learner: train task loss: 2.234 - val task loss: 5.128


1280it [00:00, 3870.52it/s]


meta_learner: train task loss: 2.147 - val task loss: 4.815 [*] Best so far


1280it [00:00, 4011.20it/s]


meta_learner: train task loss: 1.834 - val task loss: 4.681 [*] Best so far


1280it [00:00, 4031.50it/s]


meta_learner: train task loss: 1.820 - val task loss: 4.889


1280it [00:00, 3923.24it/s]


meta_learner: train task loss: 1.712 - val task loss: 4.642 [*] Best so far


1280it [00:00, 3403.21it/s]


meta_learner: train task loss: 1.590 - val task loss: 4.748
Done!
Selecting greedy ensemble on the best cohort...
Pruned 1 worst models, keeping 2 models
Initial best models: [0, 2] with losses: [tensor(4.5194, grad_fn=<MseLossBackward0>), tensor(57.7159, grad_fn=<MseLossBackward0>)]
Done!
Method: (simple_average), Test_MSE: 17.986480712890625
Method: (weighted_average), Test_MSE: 4.7485432624816895
Method: (meta_learner), Test_MSE: 4.266781806945801
Method: (greedy_ensemble), Test_MSE: 4.509246349334717
Method: (best_single), Test_MSE: 4.197611331939697
Method: (cohort), Test_MSE: [4.197611331939697, 72.91716003417969, 58.24107360839844]
Method: (simple_average), Test_MSE: 17.986480712890625
Method: (weighted_average), Test_MSE: 4.7485432624816895
Method: (meta_learner), Test_MSE: 4.266781806945801
Method: (greedy_ensemble), Test_MSE: 4.509246349334717
Method: (best_single), Test_MSE: 4.197611331939697
Method: (cohort), Test_MSE: [4.197611331939697, 72.91716003417969, 58.2410736083984

  0%|          | 0/1280 [00:00<?, ?it/s]



Epoch: 2/10 - LR: 0.001000


  0%|          | 0/1280 [00:00<?, ?it/s]



Epoch: 3/10 - LR: 0.001000


  0%|          | 0/1280 [00:00<?, ?it/s]



Epoch: 4/10 - LR: 0.001000


  0%|          | 0/1280 [00:00<?, ?it/s]



Epoch: 5/10 - LR: 0.001000


  0%|          | 0/1280 [00:00<?, ?it/s]



Epoch: 6/10 - LR: 0.001000


  0%|          | 0/1280 [00:00<?, ?it/s]



Epoch: 7/10 - LR: 0.001000


  0%|          | 0/1280 [00:00<?, ?it/s]



Epoch: 8/10 - LR: 0.001000


  0%|          | 0/1280 [00:00<?, ?it/s]



Epoch: 9/10 - LR: 0.001000


  0%|          | 0/1280 [00:00<?, ?it/s]



Epoch: 10/10 - LR: 0.001000


  0%|          | 0/1280 [00:00<?, ?it/s]


Finished training student cohort!
Training meta learner on the best cohort...


1280it [00:00, 3941.72it/s]


meta_learner: train task loss: 78.551 - val task loss: 49.238 [*] Best so far


1280it [00:00, 3840.74it/s]


meta_learner: train task loss: 54.549 - val task loss: 39.647 [*] Best so far


1280it [00:00, 3402.84it/s]


meta_learner: train task loss: 43.344 - val task loss: 33.209 [*] Best so far


1280it [00:00, 3854.87it/s]


meta_learner: train task loss: 35.458 - val task loss: 26.926 [*] Best so far


1280it [00:00, 3796.71it/s]


meta_learner: train task loss: 30.051 - val task loss: 23.231 [*] Best so far


1280it [00:00, 3932.22it/s]


meta_learner: train task loss: 25.365 - val task loss: 20.506 [*] Best so far


1280it [00:00, 3871.48it/s]


meta_learner: train task loss: 21.354 - val task loss: 17.596 [*] Best so far


1280it [00:00, 4102.97it/s]


meta_learner: train task loss: 17.859 - val task loss: 15.159 [*] Best so far


1280it [00:00, 4083.86it/s]


meta_learner: train task loss: 14.887 - val task loss: 13.319 [*] Best so far


1280it [00:00, 3889.92it/s]


meta_learner: train task loss: 12.643 - val task loss: 11.792 [*] Best so far
Done!
Selecting greedy ensemble on the best cohort...
Pruned 1 worst models, keeping 2 models
Initial best models: [1, 0] with losses: [tensor(80.2800, grad_fn=<MseLossBackward0>), tensor(83.8165, grad_fn=<MseLossBackward0>)]
Done!
Method: (simple_average), Test_MSE: 4.193029880523682
Method: (weighted_average), Test_MSE: 4.288529872894287
Method: (meta_learner), Test_MSE: 11.221004486083984
Method: (greedy_ensemble), Test_MSE: 26.126554489135742
Method: (best_single), Test_MSE: 78.17144775390625
Method: (cohort), Test_MSE: [81.78608703613281, 78.17144775390625, 94.43157958984375]
Finished running joint fusion!
Start training student cohort...
Training with disagreement penalty = 0.1

Epoch: 1/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2914.46it/s]


model_1: train loss: 122.001, train task loss: 122.057 - val loss: 78.726, val task loss: 79.106 [*] Best so far
model_2: train loss: 130.249, train task loss: 130.288 - val loss: 104.960, val task loss: 105.190 [*] Best so far
model_3: train loss: 128.577, train task loss: 128.645 - val loss: 103.449, val task loss: 103.819 [*] Best so far

Epoch: 2/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2969.84it/s]


model_1: train loss: 44.734, train task loss: 47.392 - val loss: 12.809, val task loss: 18.495 [*] Best so far
model_2: train loss: 95.174, train task loss: 97.323 - val loss: 74.333, val task loss: 80.452 [*] Best so far
model_3: train loss: 82.491, train task loss: 85.103 - val loss: 55.246, val task loss: 62.824 [*] Best so far

Epoch: 3/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2886.87it/s]


model_1: train loss: 6.976, train task loss: 11.299 - val loss: 3.226, val task loss: 5.845 [*] Best so far
model_2: train loss: 70.452, train task loss: 80.411 - val loss: 66.160, val task loss: 77.266 [*] Best so far
model_3: train loss: 43.591, train task loss: 52.651 - val loss: 51.236, val task loss: 61.103 [*] Best so far

Epoch: 4/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2956.41it/s]


model_1: train loss: 1.506, train task loss: 4.623 - val loss: 1.980, val task loss: 5.202 [*] Best so far
model_2: train loss: 64.867, train task loss: 75.013 - val loss: 68.072, val task loss: 78.232
model_3: train loss: 40.630, train task loss: 47.910 - val loss: 47.598, val task loss: 56.092 [*] Best so far

Epoch: 5/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2966.31it/s]


model_1: train loss: 0.321, train task loss: 3.218 - val loss: 1.897, val task loss: 4.858 [*] Best so far
model_2: train loss: 60.691, train task loss: 69.931 - val loss: 68.572, val task loss: 79.376
model_3: train loss: 36.632, train task loss: 43.587 - val loss: 48.849, val task loss: 58.186

Epoch: 6/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2643.37it/s]


model_1: train loss: 0.150, train task loss: 2.644 - val loss: 1.732, val task loss: 4.607 [*] Best so far
model_2: train loss: 55.659, train task loss: 64.627 - val loss: 70.823, val task loss: 81.745
model_3: train loss: 33.589, train task loss: 40.470 - val loss: 49.486, val task loss: 58.742

Epoch: 7/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2974.23it/s]


model_1: train loss: -0.227, train task loss: 2.129 - val loss: 1.905, val task loss: 4.840
model_2: train loss: 50.338, train task loss: 58.245 - val loss: 73.709, val task loss: 85.329
model_3: train loss: 31.073, train task loss: 37.161 - val loss: 49.429, val task loss: 59.190

Epoch: 8/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2825.58it/s]


model_1: train loss: -0.279, train task loss: 1.649 - val loss: 1.742, val task loss: 4.985
model_2: train loss: 43.204, train task loss: 50.225 - val loss: 76.593, val task loss: 87.949
model_3: train loss: 28.201, train task loss: 33.758 - val loss: 50.213, val task loss: 59.561

Epoch: 9/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2878.49it/s]


model_1: train loss: -0.256, train task loss: 1.467 - val loss: 1.685, val task loss: 4.770
model_2: train loss: 36.274, train task loss: 42.323 - val loss: 85.802, val task loss: 98.583
model_3: train loss: 25.540, train task loss: 30.521 - val loss: 50.915, val task loss: 61.168

Epoch: 10/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2793.43it/s]


model_1: train loss: -0.301, train task loss: 1.074 - val loss: 1.417, val task loss: 4.996
model_2: train loss: 29.531, train task loss: 34.506 - val loss: 89.427, val task loss: 102.027
model_3: train loss: 22.621, train task loss: 26.987 - val loss: 52.240, val task loss: 62.070
Training with disagreement penalty = 0.3

Epoch: 1/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2787.85it/s]


model_1: train loss: 122.055, train task loss: 122.215 - val loss: 80.295, val task loss: 81.287 [*] Best so far
model_2: train loss: 130.277, train task loss: 130.392 - val loss: 105.108, val task loss: 105.751 [*] Best so far
model_3: train loss: 128.356, train task loss: 128.552 - val loss: 103.193, val task loss: 104.245 [*] Best so far

Epoch: 2/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2809.12it/s]


model_1: train loss: 40.085, train task loss: 48.427 - val loss: 3.655, val task loss: 29.144 [*] Best so far
model_2: train loss: 92.015, train task loss: 98.885 - val loss: 63.950, val task loss: 85.596 [*] Best so far
model_3: train loss: 76.579, train task loss: 85.353 - val loss: 35.325, val task loss: 64.984 [*] Best so far

Epoch: 3/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2901.38it/s]


model_1: train loss: 11.060, train task loss: 39.743 - val loss: 0.252, val task loss: 15.679 [*] Best so far
model_2: train loss: 45.108, train task loss: 84.155 - val loss: 38.368, val task loss: 79.922 [*] Best so far
model_3: train loss: 22.779, train task loss: 62.980 - val loss: 28.582, val task loss: 66.668

Epoch: 4/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2968.06it/s]


model_1: train loss: -1.290, train task loss: 11.063 - val loss: -0.525, val task loss: 10.847 [*] Best so far
model_2: train loss: 37.863, train task loss: 79.385 - val loss: 40.049, val task loss: 82.886
model_3: train loss: 24.241, train task loss: 56.241 - val loss: 26.381, val task loss: 63.269 [*] Best so far

Epoch: 5/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2974.50it/s]


model_1: train loss: -1.438, train task loss: 11.663 - val loss: -0.978, val task loss: 13.148
model_2: train loss: 35.622, train task loss: 76.459 - val loss: 39.531, val task loss: 82.337
model_3: train loss: 21.020, train task loss: 51.243 - val loss: 27.277, val task loss: 63.289

Epoch: 6/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2923.12it/s]


model_1: train loss: -1.451, train task loss: 10.946 - val loss: -2.398, val task loss: 9.566 [*] Best so far
model_2: train loss: 33.890, train task loss: 73.305 - val loss: 41.332, val task loss: 83.183
model_3: train loss: 18.719, train task loss: 48.160 - val loss: 27.819, val task loss: 62.852 [*] Best so far

Epoch: 7/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2995.18it/s]


model_1: train loss: -2.081, train task loss: 8.790 - val loss: -1.619, val task loss: 10.485
model_2: train loss: 31.618, train task loss: 68.854 - val loss: 41.498, val task loss: 84.349
model_3: train loss: 18.225, train task loss: 45.944 - val loss: 27.477, val task loss: 63.469

Epoch: 8/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2965.05it/s]


model_1: train loss: -2.122, train task loss: 8.348 - val loss: -2.306, val task loss: 10.322
model_2: train loss: 28.971, train task loss: 63.227 - val loss: 42.941, val task loss: 85.079
model_3: train loss: 16.925, train task loss: 42.707 - val loss: 28.102, val task loss: 63.263

Epoch: 9/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2739.48it/s]


model_1: train loss: -2.077, train task loss: 7.721 - val loss: -2.559, val task loss: 9.503 [*] Best so far
model_2: train loss: 26.589, train task loss: 57.687 - val loss: 44.815, val task loss: 87.677
model_3: train loss: 15.418, train task loss: 39.286 - val loss: 28.653, val task loss: 64.063

Epoch: 10/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2866.92it/s]


model_1: train loss: -2.094, train task loss: 5.798 - val loss: -2.339, val task loss: 9.680
model_2: train loss: 22.861, train task loss: 50.883 - val loss: 47.702, val task loss: 91.478
model_3: train loss: 14.900, train task loss: 37.001 - val loss: 28.087, val task loss: 64.899
Training with disagreement penalty = 0.5

Epoch: 1/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2916.54it/s]


model_1: train loss: 121.689, train task loss: 121.974 - val loss: 79.065, val task loss: 80.903 [*] Best so far
model_2: train loss: 129.324, train task loss: 129.523 - val loss: 103.533, val task loss: 104.723 [*] Best so far
model_3: train loss: 128.177, train task loss: 128.529 - val loss: 101.915, val task loss: 103.794 [*] Best so far

Epoch: 2/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2961.86it/s]


model_1: train loss: 37.147, train task loss: 53.577 - val loss: -12.310, val task loss: 47.056 [*] Best so far
model_2: train loss: 85.228, train task loss: 98.398 - val loss: 48.705, val task loss: 95.758 [*] Best so far
model_3: train loss: 69.085, train task loss: 85.732 - val loss: 7.402, val task loss: 66.070 [*] Best so far

Epoch: 3/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2961.25it/s]


model_1: train loss: 22.002, train task loss: 178.840 - val loss: 34.215, val task loss: 273.921
model_2: train loss: -1.154, train task loss: 111.233 - val loss: -38.467, val task loss: 105.559
model_3: train loss: -24.826, train task loss: 96.154 - val loss: -24.317, val task loss: 126.965

Epoch: 4/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2944.74it/s]


model_1: train loss: 12.817, train task loss: 381.110 - val loss: -13.987, val task loss: 588.807
model_2: train loss: -49.806, train task loss: 117.870 - val loss: -42.721, val task loss: 168.074
model_3: train loss: -19.201, train task loss: 207.870 - val loss: -31.944, val task loss: 301.747

Epoch: 5/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2831.91it/s]


model_1: train loss: -67.282, train task loss: 1680.403 - val loss: -187.889, val task loss: 3300.793
model_2: train loss: -113.529, train task loss: 298.087 - val loss: -167.668, val task loss: 614.586
model_3: train loss: -87.614, train task loss: 881.090 - val loss: -155.504, val task loss: 1582.925

Epoch: 6/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2786.49it/s]


model_1: train loss: -1672.055, train task loss: 13310.672 - val loss: -4096.662, val task loss: 28050.521
model_2: train loss: -429.707, train task loss: 2599.656 - val loss: -490.857, val task loss: 7222.454
model_3: train loss: -373.591, train task loss: 6700.312 - val loss: -463.059, val task loss: 12749.084

Epoch: 7/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2865.23it/s]


model_1: train loss: -18799.074, train task loss: 112528.096 - val loss: -35534.672, val task loss: 198395.047
model_2: train loss: -1872.736, train task loss: 31911.649 - val loss: -1806.398, val task loss: 68392.586
model_3: train loss: -1867.684, train task loss: 48510.980 - val loss: -1075.461, val task loss: 80652.000

Epoch: 8/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2850.61it/s]


model_1: train loss: -127509.849, train task loss: 700881.599 - val loss: -208340.625, val task loss: 1037806.625
model_2: train loss: -12342.229, train task loss: 260827.633 - val loss: -5552.062, val task loss: 446710.094
model_3: train loss: -5232.854, train task loss: 264694.289 - val loss: 3265.438, val task loss: 397549.281

Epoch: 9/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2850.32it/s]


model_1: train loss: -632816.713, train task loss: 3192894.562 - val loss: -889191.500, val task loss: 4203318.500
model_2: train loss: -42815.633, train task loss: 1435616.777 - val loss: -13979.000, val task loss: 2143171.750
model_3: train loss: -1014.953, train task loss: 1152819.212 - val loss: 26933.875, val task loss: 1504706.500

Epoch: 10/10 - LR: 0.001000


100%|██████████| 1280/1280 [00:00<00:00, 2834.02it/s]


model_1: train loss: -2236867.000, train task loss: 11660284.000 - val loss: -2990597.000, val task loss: 13705016.000
model_2: train loss: -245629.337, train task loss: 5841640.900 - val loss: -19069.000, val task loss: 7970735.000
model_3: train loss: -12573.763, train task loss: 3735109.288 - val loss: 109234.500, val task loss: 4552766.500
Finished training student cohort!
Selecting the optimal disgreement penalty via cross-validation...
Best rho: 0.1 with average task loss: 45.9887
Done!
Training meta learner on the best cohort...


1280it [00:00, 3255.97it/s]


meta_learner: train task loss: 14.583 - val task loss: 7.251 [*] Best so far


1280it [00:00, 3358.08it/s]


meta_learner: train task loss: 4.512 - val task loss: 7.491


1280it [00:00, 3155.13it/s]


meta_learner: train task loss: 3.047 - val task loss: 4.760 [*] Best so far


1280it [00:00, 3304.78it/s]


meta_learner: train task loss: 2.477 - val task loss: 4.584 [*] Best so far


1280it [00:00, 3629.45it/s]


meta_learner: train task loss: 2.047 - val task loss: 4.590


1280it [00:00, 3215.70it/s]


meta_learner: train task loss: 1.925 - val task loss: 4.545 [*] Best so far


1280it [00:00, 3151.68it/s]


meta_learner: train task loss: 1.757 - val task loss: 4.446 [*] Best so far


1280it [00:00, 3159.97it/s]


meta_learner: train task loss: 1.705 - val task loss: 4.528


1280it [00:00, 3132.99it/s]


meta_learner: train task loss: 1.722 - val task loss: 4.536


1280it [00:00, 3134.14it/s]


meta_learner: train task loss: 1.660 - val task loss: 4.572
Done!
Selecting greedy ensemble on the best cohort...
Pruned 1 worst models, keeping 2 models
Initial best models: [0, 2] with losses: [tensor(4.6072, grad_fn=<MseLossBackward0>), tensor(56.0924, grad_fn=<MseLossBackward0>)]
Done!
Method: (simple_average), Test_MSE: 14.243581771850586
Method: (weighted_average), Test_MSE: 3.965787410736084
Method: (meta_learner), Test_MSE: 4.020580291748047
Method: (greedy_ensemble), Test_MSE: 4.128260612487793
Method: (best_single), Test_MSE: 4.1756272315979
Method: (cohort), Test_MSE: [4.1756272315979, 73.60665893554688, 57.1135368347168]
Finished running NCL fusion!
Start training student cohort...

Epoch: 1/10 - LR: 0.001000
Using exact Shapley computation


100%|██████████| 1280/1280 [00:01<00:00, 1187.20it/s, avg_loss=127.8932, batch_time=0.054s]



Epoch: 2/10 - LR: 0.001000
Using exact Shapley computation


100%|██████████| 1280/1280 [00:00<00:00, 1370.31it/s, avg_loss=75.4421, batch_time=0.047s]



Epoch: 3/10 - LR: 0.001000
Using exact Shapley computation


100%|██████████| 1280/1280 [00:00<00:00, 1416.34it/s, avg_loss=28.0364, batch_time=0.045s]



Epoch: 4/10 - LR: 0.001000
Using exact Shapley computation


100%|██████████| 1280/1280 [00:00<00:00, 1343.75it/s, avg_loss=22.2503, batch_time=0.048s]



Epoch: 5/10 - LR: 0.001000
Using exact Shapley computation


100%|██████████| 1280/1280 [00:00<00:00, 1343.48it/s, avg_loss=20.1988, batch_time=0.048s]



Epoch: 6/10 - LR: 0.001000
Using exact Shapley computation


100%|██████████| 1280/1280 [00:01<00:00, 1191.38it/s, avg_loss=18.7662, batch_time=0.054s]



Epoch: 7/10 - LR: 0.001000
Using exact Shapley computation


100%|██████████| 1280/1280 [00:00<00:00, 1581.16it/s, avg_loss=17.4037, batch_time=0.040s]



Epoch: 8/10 - LR: 0.001000
Using exact Shapley computation


100%|██████████| 1280/1280 [00:00<00:00, 1444.96it/s, avg_loss=16.0076, batch_time=0.044s]



Epoch: 9/10 - LR: 0.001000
Using exact Shapley computation


100%|██████████| 1280/1280 [00:00<00:00, 1374.53it/s, avg_loss=14.5728, batch_time=0.047s]



Epoch: 10/10 - LR: 0.001000
Using exact Shapley computation


100%|██████████| 1280/1280 [00:00<00:00, 1623.43it/s, avg_loss=13.1070, batch_time=0.039s]


Finished training student cohort!
Training meta learner on the best cohort...


1280it [00:00, 3294.65it/s]


meta_learner: train task loss: 72.078 - val task loss: 54.453 [*] Best so far


1280it [00:00, 3090.32it/s]


meta_learner: train task loss: 54.388 - val task loss: 40.320 [*] Best so far


1280it [00:00, 3101.88it/s]


meta_learner: train task loss: 43.547 - val task loss: 32.869 [*] Best so far


1280it [00:00, 4198.46it/s]


meta_learner: train task loss: 35.148 - val task loss: 26.590 [*] Best so far


1280it [00:00, 4189.35it/s]


meta_learner: train task loss: 28.147 - val task loss: 21.359 [*] Best so far


1280it [00:00, 3714.26it/s]


meta_learner: train task loss: 22.794 - val task loss: 18.799 [*] Best so far


1280it [00:00, 4211.11it/s]


meta_learner: train task loss: 18.540 - val task loss: 15.675 [*] Best so far


1280it [00:00, 3647.59it/s]


meta_learner: train task loss: 15.605 - val task loss: 15.177 [*] Best so far


1280it [00:00, 3387.28it/s]


meta_learner: train task loss: 13.737 - val task loss: 12.203 [*] Best so far


1280it [00:00, 3317.42it/s]
Repetitions: 100%|██████████| 1/1 [01:09<00:00, 69.33s/it]

meta_learner: train task loss: 11.145 - val task loss: 11.343 [*] Best so far
Done!
Selecting greedy ensemble on the best cohort...
Pruned 1 worst models, keeping 2 models
Initial best models: [0, 2] with losses: [tensor(9.0733, grad_fn=<MseLossBackward0>), tensor(63.0920, grad_fn=<MseLossBackward0>)]
Done!
Method: (simple_average), Test_MSE: 9.408285140991211
Method: (weighted_average), Test_MSE: 5.01217794418335
Method: (meta_learner), Test_MSE: 10.518463134765625
Method: (greedy_ensemble), Test_MSE: 6.667391300201416
Method: (best_single), Test_MSE: 8.215972900390625
Method: (cohort), Test_MSE: [8.215972900390625, 84.87994384765625, 64.48446655273438]
Finished running shapley fusion!





Unnamed: 0,Method,Test_metric,best_rho,cohort_pairs,ensemble_idxs,cluster_idxs,random_state,dim_modalities,n,n_train,n_val,n_test,extractor,weight_type
0,modality_1,73.765739,,,,,1234,"[500, 400]",2000,1280,320,400,separate,clustering
1,modality_2,58.633133,,,,,1234,"[500, 400]",2000,1280,320,400,separate,clustering
2,early_fusion,3.935762,,,,,1234,"[500, 400]",2000,1280,320,400,separate,clustering
3,late_fusion,37.302422,,,,,1234,"[500, 400]",2000,1280,320,400,separate,clustering
4,coop,4.119238,0.0,,,,1234,"[500, 400]",2000,1280,320,400,separate,clustering
5,metafusion_simple_average,17.986481,0.0,"[(500, 400), (500, 0), (0, 400)]",,[0],1234,"[500, 400]",2000,1280,320,400,separate,clustering
6,metafusion_weighted_average,4.748543,0.0,"[(500, 400), (500, 0), (0, 400)]",,[0],1234,"[500, 400]",2000,1280,320,400,separate,clustering
7,metafusion_meta_learner,4.266782,0.0,"[(500, 400), (500, 0), (0, 400)]",,[0],1234,"[500, 400]",2000,1280,320,400,separate,clustering
8,metafusion_greedy_ensemble,4.509246,0.0,"[(500, 400), (500, 0), (0, 400)]","[0, 2]",[0],1234,"[500, 400]",2000,1280,320,400,separate,clustering
9,metafusion_best_single,4.197611,0.0,"[(500, 400), (500, 0), (0, 400)]",,[0],1234,"[500, 400]",2000,1280,320,400,separate,clustering


In [6]:
#####################
#    Save Results   #
#####################
results.to_csv(outfile, index=False)
print("\nResults written to {:s}\n".format(outfile))
sys.stdout.flush()

# After the job is done, remove the model directory to free up space
if os.path.exists(ckpt_dir):
    print(f"Deleting the model checkpoint directory: {ckpt_dir}")
    shutil.rmtree(ckpt_dir)
    print(f"Model checkpoint directory {ckpt_dir} has been deleted.")



Results written to ./results/regression_linear_early/seed1234.txt

