In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from sklearn import metrics
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Subset
import numpy as np
import copy
import statistics
import matplotlib.pyplot as plt
import time
from tqdm import tqdm

import sys
sys.path.append('./../src')
import globals
from model import Net
from training import train_model, train_model_CL
from visualizations import plot_embeddings, plot_confusion_matrix
from feature_attribution import Feature_Importance_Evaluations
from pytorch_utils import get_features, get_labels
from embedding_measurements import measure_embedding_confusion_knn, measure_embedding_drift

  Referenced from: <0B7EB158-53DC-3403-8A49-22178CAB4612> /Users/david/miniconda3/envs/dl_project/lib/python3.10/site-packages/torchvision/image.so
  warn(
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEED = globals.SEED
DEVICE = globals.DEVICE
full_trainset = globals.full_trainset
trainset = globals.trainset
testset = globals.testset
trainloaders = globals.trainloaders
valloaders = globals.valloaders
testloaders = globals.testloaders

In [3]:
# This is the two-step process used to prepare the
# data for use with the convolutional neural network.

# First step is to convert Python Image Library (PIL) format
# to PyTorch tensors.

# Second step is used to normalize the data by specifying a 
# mean and standard deviation for each of the three channels.
# This will convert the data from [0,1] to [-1,1]

# Normalization of data should help speed up conversion and
# reduce the chance of vanishing gradients with certain 
# activation functions.
def initialize_data():
    transform = transforms.Compose([
        transforms.ToTensor()
        #transforms.Normalize((0.5,), (0.5,))  # Normalizes to mean 0.5 and std 0.5 for the single channel
    ])

    globals.full_trainset = torchvision.datasets.MNIST('./../data/', train=True, download=True,
                                transform=transform)
    targets = np.array(globals.full_trainset.targets)

    # Perform stratified split
    train_indices, val_indices = train_test_split(
        np.arange(len(targets)),
        test_size=0.01,
        stratify=targets
    )

    # Create subsets
    valset = Subset(globals.full_trainset, val_indices)
    globals.trainset = Subset(globals.full_trainset, train_indices)

    globals.testset = torchvision.datasets.MNIST('./../data/', train=False, download=True,
                                transform=transform)

    # Define class pairs for each subset
    class_pairs = [tuple(range(i*globals.CLASSES_PER_ITER,(i+1)*globals.CLASSES_PER_ITER)) for i in range(globals.ITERATIONS)]
    #print(class_pairs)

    # Dictionary to hold data loaders for each subset
    globals.trainloaders = []
    globals.testloaders = []
    globals.valloaders = []
    subset_indices = []
    # Loop over each class pair
    for i, t in enumerate(class_pairs):
        # Get indices of images belonging to the specified class pair
        subs_ind = [idx for idx, (_, label) in enumerate(globals.trainset) if label in list(t)]
        val_subset_indices = [idx for idx, (_, label) in enumerate(valset) if label in list(t)]
        test_subset_indices = [idx for idx, (_, label) in enumerate(globals.testset) if label in list(t)]
        # Create a subset for the current class pair
        train_subset = Subset(globals.trainset, subs_ind)
        globals.trainloaders.append(DataLoader(train_subset, batch_size=globals.BATCH_SIZE, shuffle=True, pin_memory=True, num_workers = 0))

        subset_indices.append(subs_ind)
        
        val_subset = Subset(valset, val_subset_indices)
        globals.valloaders.append(DataLoader(val_subset, batch_size=500, shuffle=False))

        test_subset = Subset(globals.testset, test_subset_indices)
        globals.testloaders.append(DataLoader(test_subset, batch_size=500, shuffle=False))


In [None]:
def run_experiment(
        verbose = False,
        stopOnLoss = 0.03,
        full_CE = True,
        with_OOD = False,
        kd_loss = 0,
        stopOnValAcc = None,
        epochs = 1000000,
        with_dropout = False
        ):
    def _print(*args, **kwargs):
        if verbose:
            print(*args, **kwargs)
    if with_OOD:
        globals.OOD_CLASS = 1
    else:
        globals.OOD_CLASS = 0
    initialize_data()
    prevModel = None
    globals.BATCH_SIZE=4
    
    ogd = True
    globals.WITH_DROPOUT = with_dropout

    #[Denis] added code:
    Feature_Importance_Eval=Feature_Importance_Evaluations(globals.testloaders, DEVICE)

    for i in tqdm(range(globals.ITERATIONS), desc="Experiment Progress"):
        model = Net((i+1)*(globals.CLASSES_PER_ITER+globals.OOD_CLASS))
        if prevModel is not None:
            with torch.no_grad():
                model.copyPrev(prevModel)
        train_loader = globals.trainloaders[i]
        val_loader = globals.valloaders[i]
        if prevModel:
            _print("CL TRAIN!!")
            train_model_CL(
                model,
                prevModel,
                train_loader,
                val_loader,
                i,
                verbose,
                epochs,
                True,
                freeze_nonzero_params=False,
                l1_loss=0,
                ewc_loss=0,
                kd_loss=kd_loss,
                distance_loss=0,
                center_loss=0,
                param_reuse_loss=0,
                stopOnLoss=stopOnLoss,
                stopOnValAcc = stopOnValAcc,
                full_CE=full_CE,
                ogd=ogd,
                )
        else:
            train_model(
                model, 
                train_loader, 
                val_loader, 
                verbose, 
                epochs=epochs, 
                l1_loss=0,
                stopOnLoss=stopOnLoss,
                center_loss =0,
                ogd=ogd
                )

        #[Denis] added code:
        Feature_Importance_Eval.Task_Feature_Attribution(model, i)
        
        if verbose or i == globals.ITERATIONS-1:
            _print("Starting evaluation")
            _print("ITERATION", i+1)
            _print("ACCURACIES PER TASK:")
            accumPred = []
            all_labels = []
            all_embeddings = []
            with torch.no_grad():
                for j in range(i+1):
                    val_loader = globals.testloaders[j]
                    val_labels = get_labels(val_loader).to(DEVICE)
                    all_labels.append(val_labels)
                    model.eval()
                    pred, embeddings = model.get_pred_and_embeddings((get_features(val_loader).to(DEVICE)))
                    model.train()
                    accumPred.append(pred)
                    all_embeddings.append(embeddings)
                    sliced_pred = pred[:, j*(globals.CLASSES_PER_ITER+globals.OOD_CLASS):(j+1)*(globals.CLASSES_PER_ITER+globals.OOD_CLASS)]
                    _, predicted = torch.max(sliced_pred, 1)  # Get the class predictions
                    predicted += j*globals.CLASSES_PER_ITER
                    correct = (predicted == val_labels).sum().item()  # Count how many were correct
                    accuracy = correct / val_labels.size(0)  # Accuracy as a percentage
                    _print(str(accuracy), end=' ')
            accumPred = torch.cat(accumPred)
            all_labels = torch.cat(all_labels)
            all_embeddings = torch.cat(all_embeddings)
            predicted = []
            for x in accumPred:
                if globals.OOD_CLASS == 1:
                    x_pred = x[[i for i in range(x.size(0)) if (i + 1) % (globals.CLASSES_PER_ITER+1) != 0]]
                else:
                    x_pred = x
                x_pred = torch.softmax(x_pred, dim=-1)
                max = 0
                for (k, v) in enumerate(x_pred):
                    if v > max:
                        max = v
                        p = k
                predicted.append(p)
            predicted = torch.tensor(predicted).to(DEVICE)
            correct = (predicted == all_labels).sum().item()  # Count how many were correct
            accuracy = correct / all_labels.size(0)  # Accuracy as a percentage
            _print("Accuracy on tasks so far:", accuracy)

            embedding_drift = measure_embedding_drift(all_embeddings, all_labels, model.prev_test_embedding_centers)
            _print("Average embedding drift based on centroids:", embedding_drift)
            total_confusion, intra_phase_confusion, per_task_confusion = measure_embedding_confusion_knn(all_embeddings, all_labels, k = 1000, task=i+1)
            _print("Total confusion", total_confusion)
            _print("Intra-phase confusion", intra_phase_confusion)
            _print("Per task confusions", per_task_confusion)
            if verbose:
                plot_confusion_matrix(predicted.cpu(), all_labels.cpu(), list(range(globals.CLASSES_PER_ITER*(i+1))))
        prevModel = copy.deepcopy(model)
        
    #[Denis] added code:
    [avg_att_diff,att_diffs,_,_,avg_att_spread,att_spreads]=Feature_Importance_Eval.Get_Feature_Change_Score(prevModel)
    _print("Average SHAPC values (ordered as tasks):", att_diffs)
    _print("Averaged SHAPC value (the smaller the better):", avg_att_diff)
    _print("Average attention spread values (ordered as tasks):", att_spreads)
    _print("Averaged attention spread value (the bigger the better):", avg_att_spread)
    Feature_Importance_Eval.Save_Random_Picture_Salency() #prints the salcency maps for 1 example by class (first row: image, second row: salency map after training, third row: salency map after training task where class is included)
    
    return accuracy, total_confusion, intra_phase_confusion, per_task_confusion, embedding_drift, avg_att_diff, avg_att_spread

In [5]:
def run_experiments(n_runs=1, *args, **kwargs):
    verbose = kwargs.get('verbose', None)
    def _print(*args, **kwargs):
        if verbose:
            print(*args, **kwargs)
    def report_stats(data, name):
        print(name, data)
        mean = statistics.mean(data)
        std = statistics.stdev(data)
        print(f"Mean " + name + f" across {n_runs} runs: {mean}")
        print(f"Standard deviation of " + name + f" across {n_runs} runs: {std}\n")
    accuracies = []
    total_confusions = []
    intra_phase_confusions = []
    per_task_confusions = []
    att_diffs = []
    embedding_drifts = []
    att_spreads = []
    for r in range(n_runs):
        print(f"Starting run {r+1}.")
        accuracy, total_confusion, intra_phase_confusion, per_task_confusion, embedding_drift, avg_att_diff, avg_att_spread = run_experiment(*args, **kwargs)
        accuracies.append(accuracy)
        total_confusions.append(total_confusion)
        intra_phase_confusions.append(intra_phase_confusion)
        per_task_confusions.append(per_task_confusion)
        att_diffs.append(avg_att_diff)
        embedding_drifts.append(embedding_drift)
        att_spreads.append(avg_att_spread)
        _print(f"Run {r} finished with accuracy {accuracy}")

    report_stats(accuracies, "accuracy")
    report_stats(total_confusions, "total confusion")
    report_stats(intra_phase_confusions, "intra-phase confusion")
    report_stats(per_task_confusions, "per-task confusion")
    report_stats(embedding_drifts, "embedding drift")
    report_stats(att_diffs, "attention drift")
    report_stats(att_spreads, "attention spread")

In [6]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [02:08<00:00, 25.66s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [02:12<00:00, 26.47s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [02:06<00:00, 25.34s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [02:15<00:00, 27.01s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [02:05<00:00, 25.06s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [02:03<00:00, 24.80s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [02:05<00:00, 25.07s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [02:02<00:00, 24.42s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [02:07<00:00, 25.56s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [02:06<00:00, 25.39s/it]


accuracy [0.1971, 0.1949, 0.1974, 0.1973, 0.1968, 0.1969, 0.197, 0.1974, 0.1975, 0.1976]
Mean accuracy across 10 runs: 0.19699
Standard deviation of accuracy across 10 runs: 0.0007809538327512664

total confusion [0.41886920000000005, 0.39497669999999996, 0.38679169999999996, 0.4123612, 0.4253873, 0.4256447, 0.4123061, 0.44758889999999996, 0.3921654, 0.4048849]
Mean total confusion across 10 runs: 0.41209761
Standard deviation of total confusion across 10 runs: 0.018404617094960475

intra-phase confusion [0.4118088, 0.38928149999999995, 0.37939449999999997, 0.40564310000000003, 0.41835279999999997, 0.41840750000000004, 0.40767489999999995, 0.43944950000000005, 0.3863132, 0.39890499999999995]
Mean intra-phase confusion across 10 runs: 0.40552308
Standard deviation of intra-phase confusion across 10 runs: 0.017904304461578216

per-task confusion [0.08560009517237521, 0.0749314110333608, 0.07121918643941919, 0.08285076101719413, 0.08913623755668428, 0.08317366227662773, 0.0755652072903740

In [7]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02, with_dropout=True)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [03:00<00:00, 36.15s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [03:26<00:00, 41.33s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [02:55<00:00, 35.14s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [03:16<00:00, 39.37s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [03:27<00:00, 41.50s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [03:33<00:00, 42.62s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [03:11<00:00, 38.28s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [03:17<00:00, 39.48s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [03:03<00:00, 36.78s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [03:20<00:00, 40.19s/it]


accuracy [0.1977, 0.1977, 0.1976, 0.1977, 0.1978, 0.1978, 0.1974, 0.1978, 0.1969, 0.1976]
Mean accuracy across 10 runs: 0.1976
Standard deviation of accuracy across 10 runs: 0.000274873708374513

total confusion [0.4522132, 0.40832440000000003, 0.41583879999999995, 0.4654612, 0.46736920000000004, 0.4425601, 0.4694939, 0.4181722, 0.4429925, 0.4397649]
Mean total confusion across 10 runs: 0.44221904
Standard deviation of total confusion across 10 runs: 0.02222681397227733

intra-phase confusion [0.4445894, 0.4027069, 0.41177739999999996, 0.4548078, 0.45097810000000005, 0.43200249999999996, 0.45952040000000005, 0.40962109999999996, 0.43726750000000003, 0.4313709]
Mean intra-phase confusion across 10 runs: 0.4334642
Standard deviation of intra-phase confusion across 10 runs: 0.019918688503959763

per-task confusion [0.10428779704970643, 0.08650513910173005, 0.08700019937146886, 0.11838636549703152, 0.1412950540952015, 0.11911672534407676, 0.11811482562754314, 0.09705507259719445, 0.0967278

In [8]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02, kd_loss=1)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [02:33<00:00, 30.66s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [02:48<00:00, 33.70s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [02:38<00:00, 31.74s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [02:41<00:00, 32.24s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [02:31<00:00, 30.33s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [02:33<00:00, 30.72s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [02:39<00:00, 31.99s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [02:37<00:00, 31.53s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [02:36<00:00, 31.31s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [02:46<00:00, 33.36s/it]


accuracy [0.1955, 0.2079, 0.2222, 0.2005, 0.2342, 0.2037, 0.213, 0.1981, 0.198, 0.2024]
Mean accuracy across 10 runs: 0.20755
Standard deviation of accuracy across 10 runs: 0.012344251923691265

total confusion [0.3811489, 0.3953571, 0.37087479999999995, 0.38938059999999997, 0.3984137, 0.38683219999999996, 0.3998899, 0.3837528, 0.3814963, 0.39586010000000005]
Mean total confusion across 10 runs: 0.38830064
Standard deviation of total confusion across 10 runs: 0.009241427252708934

intra-phase confusion [0.37783, 0.3915653, 0.3672797, 0.38573460000000004, 0.3948804, 0.38241289999999994, 0.3955149, 0.3800896, 0.3779739, 0.3924938]
Mean intra-phase confusion across 10 runs: 0.38457751
Standard deviation of intra-phase confusion across 10 runs: 0.009139592841216855

per-task confusion [0.06184715411221091, 0.06959069173376908, 0.06651591354113343, 0.06722747169749217, 0.06481714528597562, 0.0690401595283358, 0.07339972321465393, 0.06304749494619215, 0.06717803468061004, 0.06107767844678409

In [9]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02, full_CE=False)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [02:03<00:00, 24.70s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [02:00<00:00, 24.06s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [01:52<00:00, 22.44s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [01:59<00:00, 23.87s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [01:59<00:00, 23.82s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [02:03<00:00, 24.65s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [02:02<00:00, 24.52s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [01:55<00:00, 23.15s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [02:04<00:00, 24.81s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [01:58<00:00, 23.67s/it]


accuracy [0.6229, 0.5949, 0.5406, 0.6298, 0.6252, 0.6053, 0.7107, 0.6894, 0.5617, 0.5955]
Mean accuracy across 10 runs: 0.6176
Standard deviation of accuracy across 10 runs: 0.051925523589079016

total confusion [0.4239429, 0.43690209999999996, 0.43993479999999996, 0.39339979999999997, 0.4246637, 0.4167843, 0.42107779999999995, 0.41578000000000004, 0.41555240000000004, 0.41765189999999996]
Mean total confusion across 10 runs: 0.42056897
Standard deviation of total confusion across 10 runs: 0.012820611285829281

intra-phase confusion [0.4210081, 0.4333814, 0.4370111, 0.3916493, 0.42251150000000004, 0.41450810000000005, 0.41875549999999995, 0.4139252, 0.41330009999999995, 0.41528180000000003]
Mean intra-phase confusion across 10 runs: 0.41813321
Standard deviation of intra-phase confusion across 10 runs: 0.012384755147756821

per-task confusion [0.07335754687082643, 0.07558208210778464, 0.0747055472420104, 0.05750761713763726, 0.06809817210826516, 0.06291662934402227, 0.06964569579244528

In [10]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02, with_OOD=True)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [03:18<00:00, 39.76s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [03:04<00:00, 36.99s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [03:18<00:00, 39.69s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [02:54<00:00, 34.87s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [03:04<00:00, 36.85s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [03:24<00:00, 40.85s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [03:23<00:00, 40.72s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [03:11<00:00, 38.34s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [03:15<00:00, 39.16s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [03:10<00:00, 38.13s/it]


accuracy [0.1975, 0.1995, 0.1995, 0.197, 0.1987, 0.196, 0.1984, 0.1983, 0.1977, 0.1972]
Mean accuracy across 10 runs: 0.19798
Standard deviation of accuracy across 10 runs: 0.0011163432367431731

total confusion [0.3465311, 0.3732426, 0.3210307, 0.36923439999999996, 0.33669190000000004, 0.3597589, 0.3332994, 0.3375175, 0.35552459999999997, 0.3575089]
Mean total confusion across 10 runs: 0.349034
Standard deviation of total confusion across 10 runs: 0.016819538683130783

intra-phase confusion [0.33699559999999995, 0.3662698, 0.3161737, 0.36508430000000003, 0.3321619, 0.35258120000000004, 0.3285435, 0.33358699999999997, 0.3449251, 0.3502046]
Mean intra-phase confusion across 10 runs: 0.34265267
Standard deviation of intra-phase confusion across 10 runs: 0.016145751920692817

per-task confusion [0.08364597055187203, 0.08278852978178179, 0.06975606400700751, 0.06756661498521983, 0.06291605537199534, 0.08121444537867986, 0.06077916931896119, 0.06573679181168461, 0.08885737478871078, 0.08663

In [11]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02, full_CE = False, with_OOD=True)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [02:53<00:00, 34.79s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [02:57<00:00, 35.46s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [03:13<00:00, 38.76s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [03:04<00:00, 36.81s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [03:02<00:00, 36.56s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [03:13<00:00, 38.69s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [03:10<00:00, 38.07s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [02:56<00:00, 35.29s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [03:02<00:00, 36.58s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [03:12<00:00, 38.54s/it]


accuracy [0.6959, 0.7606, 0.7358, 0.7479, 0.7632, 0.697, 0.732, 0.6927, 0.7535, 0.6815]
Mean accuracy across 10 runs: 0.72601
Standard deviation of accuracy across 10 runs: 0.031242864074145896

total confusion [0.34128959999999997, 0.3403307, 0.3671303, 0.3372621, 0.34383430000000004, 0.33159700000000003, 0.35426460000000004, 0.34801570000000004, 0.3184975, 0.34182559999999995]
Mean total confusion across 10 runs: 0.34240474
Standard deviation of total confusion across 10 runs: 0.01293671264796433

intra-phase confusion [0.3382374, 0.33686609999999995, 0.364135, 0.33436889999999997, 0.34082809999999997, 0.32906389999999996, 0.35095960000000004, 0.3447692, 0.316288, 0.33982579999999996]
Mean intra-phase confusion across 10 runs: 0.3395342
Standard deviation of intra-phase confusion across 10 runs: 0.012694694110532954

per-task confusion [0.060382026411822755, 0.06253529983591295, 0.06502086716094588, 0.061255052299771795, 0.0612855165849032, 0.052659620738924454, 0.06169667461140953, 

In [12]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02, full_CE = False, kd_loss = 1, with_OOD=True)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [04:02<00:00, 48.45s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [03:53<00:00, 46.79s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [03:46<00:00, 45.30s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [03:52<00:00, 46.45s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [03:39<00:00, 43.92s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [03:52<00:00, 46.59s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [03:26<00:00, 41.33s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [03:43<00:00, 44.64s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [03:44<00:00, 44.97s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [03:51<00:00, 46.31s/it]


accuracy [0.828, 0.8767, 0.8382, 0.8568, 0.8637, 0.8435, 0.8619, 0.8406, 0.8608, 0.7818]
Mean accuracy across 10 runs: 0.8452
Standard deviation of accuracy across 10 runs: 0.02662446660915899

total confusion [0.3276791, 0.33188090000000003, 0.34987520000000005, 0.3184865, 0.345804, 0.34278470000000005, 0.34630130000000003, 0.3649679, 0.3351818, 0.3529879]
Mean total confusion across 10 runs: 0.34159493
Standard deviation of total confusion across 10 runs: 0.013536291063085033

intra-phase confusion [0.32398760000000004, 0.32816290000000004, 0.3468726, 0.315616, 0.3426274, 0.33880220000000005, 0.34274079999999996, 0.36112639999999996, 0.33155599999999996, 0.34971640000000004]
Mean intra-phase confusion across 10 runs: 0.33812083
Standard deviation of intra-phase confusion across 10 runs: 0.013461457161511968

per-task confusion [0.061029603091504535, 0.06151265935099241, 0.06358187652932738, 0.054910927586055244, 0.057952069103951474, 0.06198898982077241, 0.0627405586769992, 0.0708773

In [13]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02, full_CE = False, kd_loss = 1)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [02:21<00:00, 28.30s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [02:40<00:00, 32.17s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [02:52<00:00, 34.50s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [02:37<00:00, 31.44s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [02:27<00:00, 29.53s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [02:26<00:00, 29.35s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [02:36<00:00, 31.35s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [02:41<00:00, 32.38s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [02:36<00:00, 31.40s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [02:26<00:00, 29.39s/it]


accuracy [0.7411, 0.7552, 0.7122, 0.7676, 0.7599, 0.6857, 0.684, 0.7479, 0.7586, 0.7766]
Mean accuracy across 10 runs: 0.73888
Standard deviation of accuracy across 10 runs: 0.03330454757069803

total confusion [0.38535489999999994, 0.39326269999999997, 0.3891513, 0.3907684, 0.40427270000000004, 0.38001399999999996, 0.38709000000000005, 0.3936531, 0.3869469, 0.39867379999999997]
Mean total confusion across 10 runs: 0.39091877999999997
Standard deviation of total confusion across 10 runs: 0.0069513464372498975

intra-phase confusion [0.38336749999999997, 0.3911534, 0.38714190000000004, 0.3886024, 0.4022536, 0.378042, 0.3852276, 0.39145850000000004, 0.3848226, 0.39647600000000005]
Mean intra-phase confusion across 10 runs: 0.38885455
Standard deviation of intra-phase confusion across 10 runs: 0.006905017498449152

per-task confusion [0.05884617069259688, 0.05783846415407488, 0.05478980887664238, 0.05998441149962837, 0.05921650012598223, 0.054963962700555834, 0.05661504818883127, 0.058596

In [14]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02, full_CE = False, kd_loss = 1, with_dropout=True)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [03:45<00:00, 45.16s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [04:07<00:00, 49.55s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [04:11<00:00, 50.25s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [03:57<00:00, 47.41s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [03:59<00:00, 47.87s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [04:04<00:00, 48.84s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [04:19<00:00, 51.94s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [03:43<00:00, 44.64s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [04:01<00:00, 48.22s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [03:40<00:00, 44.16s/it]


accuracy [0.7036, 0.7484, 0.7821, 0.7637, 0.7981, 0.7591, 0.7729, 0.7838, 0.7664, 0.7871]
Mean accuracy across 10 runs: 0.76652
Standard deviation of accuracy across 10 runs: 0.02656889576595578

total confusion [0.39356990000000003, 0.38609309999999997, 0.3809249, 0.38552280000000005, 0.38427710000000004, 0.40457, 0.38948910000000003, 0.4074234, 0.375209, 0.39377130000000005]
Mean total confusion across 10 runs: 0.39008506000000004
Standard deviation of total confusion across 10 runs: 0.010067589454967739

intra-phase confusion [0.3919142, 0.38466520000000004, 0.37918090000000004, 0.3837522, 0.3828188, 0.40283880000000005, 0.387695, 0.40567889999999995, 0.37370400000000004, 0.3922278]
Mean intra-phase confusion across 10 runs: 0.38844758
Standard deviation of intra-phase confusion across 10 runs: 0.010012104683254374

per-task confusion [0.056508048702055835, 0.053170110659574575, 0.05620240670306831, 0.0575846001506223, 0.05371361214611488, 0.059324830775694656, 0.059349264189737495,

In [15]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02, full_CE = False, kd_loss = 1, with_OOD=True, with_dropout=True)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [07:39<00:00, 91.84s/it] 


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [07:22<00:00, 88.42s/it] 


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [07:22<00:00, 88.41s/it] 


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [07:25<00:00, 89.17s/it] 


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [07:54<00:00, 94.87s/it] 


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [08:01<00:00, 96.24s/it] 


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [07:10<00:00, 86.12s/it] 


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [07:02<00:00, 84.53s/it] 


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [07:47<00:00, 93.46s/it] 


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [07:21<00:00, 88.34s/it] 


accuracy [0.8876, 0.8858, 0.8432, 0.8732, 0.8785, 0.8839, 0.8687, 0.8798, 0.8161, 0.8676]
Mean accuracy across 10 runs: 0.86844
Standard deviation of accuracy across 10 runs: 0.022454160713180373

total confusion [0.3307487, 0.3147346, 0.3435386, 0.34747019999999995, 0.3041045, 0.3306139, 0.3325637, 0.32489900000000005, 0.3265641, 0.316345]
Mean total confusion across 10 runs: 0.32715823
Standard deviation of total confusion across 10 runs: 0.013094311680526691

intra-phase confusion [0.32809900000000003, 0.312361, 0.34083589999999997, 0.34498850000000003, 0.3020847, 0.32823230000000003, 0.330349, 0.3224941, 0.32370580000000004, 0.3136721]
Mean intra-phase confusion across 10 runs: 0.32468224
Standard deviation of intra-phase confusion across 10 runs: 0.01299188043067242

per-task confusion [0.05634090529016507, 0.0501948041153887, 0.0545723722310669, 0.053531874992040526, 0.04865028135725198, 0.056725777622537166, 0.050382250979138066, 0.05165308220792166, 0.055565705079666204, 0.0507

In [16]:
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02, full_CE = False, kd_loss = 1, with_OOD=True)

Starting run 1.


Experiment Progress: 100%|██████████| 5/5 [03:58<00:00, 47.62s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 5/5 [03:39<00:00, 43.89s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 5/5 [03:48<00:00, 45.67s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 5/5 [04:09<00:00, 49.92s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 5/5 [03:47<00:00, 45.56s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 5/5 [03:49<00:00, 45.90s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 5/5 [03:55<00:00, 47.03s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 5/5 [04:02<00:00, 48.42s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 5/5 [03:49<00:00, 46.00s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 5/5 [03:48<00:00, 45.61s/it]


accuracy [0.8475, 0.8258, 0.8726, 0.8616, 0.831, 0.8709, 0.8329, 0.8398, 0.8582, 0.8237]
Mean accuracy across 10 runs: 0.8464
Standard deviation of accuracy across 10 runs: 0.01844149909548813

total confusion [0.33850460000000004, 0.32606060000000003, 0.34732620000000003, 0.339186, 0.3486659, 0.33174630000000005, 0.3601244, 0.32260310000000003, 0.3544064, 0.348533]
Mean total confusion across 10 runs: 0.34171565000000004
Standard deviation of total confusion across 10 runs: 0.012259535010961862

intra-phase confusion [0.33572270000000004, 0.3229193, 0.34365389999999996, 0.3351971, 0.3449141, 0.3287169, 0.3564144, 0.31963640000000004, 0.35041900000000004, 0.34421100000000004]
Mean intra-phase confusion across 10 runs: 0.33818048
Standard deviation of intra-phase confusion across 10 runs: 0.011903367636634972

per-task confusion [0.06062307836885807, 0.0587536071225039, 0.06579785579350761, 0.07067425656207886, 0.06311524704168887, 0.058932913765759334, 0.07000158245041162, 0.0552231782

In [17]:
# training on entire dataset
globals.ITERATIONS = 1
globals.CLASSES_PER_ITER = 10
run_experiments(n_runs=10, verbose=False, stopOnLoss = 0.02)

Starting run 1.


Experiment Progress: 100%|██████████| 1/1 [04:04<00:00, 244.47s/it]


Starting run 2.


Experiment Progress: 100%|██████████| 1/1 [04:06<00:00, 246.68s/it]


Starting run 3.


Experiment Progress: 100%|██████████| 1/1 [03:33<00:00, 213.44s/it]


Starting run 4.


Experiment Progress: 100%|██████████| 1/1 [03:40<00:00, 220.13s/it]


Starting run 5.


Experiment Progress: 100%|██████████| 1/1 [04:03<00:00, 243.98s/it]


Starting run 6.


Experiment Progress: 100%|██████████| 1/1 [03:37<00:00, 217.40s/it]


Starting run 7.


Experiment Progress: 100%|██████████| 1/1 [03:31<00:00, 211.62s/it]


Starting run 8.


Experiment Progress: 100%|██████████| 1/1 [04:05<00:00, 245.24s/it]


Starting run 9.


Experiment Progress: 100%|██████████| 1/1 [03:39<00:00, 219.02s/it]


Starting run 10.


Experiment Progress: 100%|██████████| 1/1 [03:35<00:00, 215.75s/it]


accuracy [0.9905, 0.9907, 0.9905, 0.9887, 0.9912, 0.989, 0.9922, 0.9911, 0.9905, 0.9897]
Mean accuracy across 10 runs: 0.99041
Standard deviation of accuracy across 10 runs: 0.001045041200676359

total confusion [0.20959899999999998, 0.21825139999999998, 0.19106140000000005, 0.18511520000000004, 0.20553259999999995, 0.19872730000000005, 0.19630979999999998, 0.2121366, 0.19685160000000002, 0.20325610000000005]
Mean total confusion across 10 runs: 0.2016841
Standard deviation of total confusion across 10 runs: 0.010066484486651716

intra-phase confusion [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Mean intra-phase confusion across 10 runs: 0.0
Standard deviation of intra-phase confusion across 10 runs: 0.0

per-task confusion [0.20959899999999998, 0.21825139999999998, 0.19106140000000005, 0.18511520000000004, 0.20553259999999995, 0.19872730000000005, 0.19630979999999998, 0.2121366, 0.19685160000000002, 0.20325610000000005]
Mean per-task confusion across 10 runs: 0.2016841
Standard 

In [18]:
torch.cuda.empty_cache()