In [78]:
import matplotlib.pyplot as plt
import os
import re
import numpy as np

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Vanilla VCL results (no coresets, hyperparam tuning)

In [79]:
RUNS = 3

def extract_final_accuracy(file_name):
    with open(file_name) as f:
        lines = f.readlines()
    if match := re.search('Task 10 / 10. Mean Accuracy: (\d.\d+)', lines[-1], re.IGNORECASE):
        final_accuracy = float(match.group(1))
        return final_accuracy
    return float('inf')
    
def compute_mean_std(logs_dir, file_name_format: str, experiment_name: str, no_runs: int, options):
    for opt in options:
        accs = []
        for run in range(no_runs): 
            file_name = os.path.join(logs_dir, file_name_format.format(opt, run))
            acc = extract_final_accuracy(file_name)
            accs.append(acc)
        mean_acc, std_acc = np.mean(accs), np.std(accs) 
        mean_acc, std_acc = float(mean_acc), float(std_acc)
        print("Experiment {}, opt: {}, Mean Acc: {:.3f}: Std Acc: {:.3f}".format(experiment_name, opt, mean_acc, std_acc))

In [80]:
LOGS_DIR = 'logs/'

LRS = [1e-2, 1e-3, 1e-4]
format = 'vcl_lr_{}_withoutmlpinit_batch_256_coresetsize_0_epochs_50_run_{}_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'Learning Rate', 3, LRS)

print(" ")

BATCH_SIZES = [256, 512, 1024]
format = 'vcl_lr_0.001_withoutmlpinit_batch_{}_coresetsize_0_epochs_50_run_{}_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR, format, 'Batch Size', 3, BATCH_SIZES)

print(" ")

TRAIN_SAMPLES = [50, 100]
format = 'vcl_lr_0.001_withoutmlpinit_batch_256_coresetsize_0_epochs_50_no_train_samples_{}_run_{}_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR, format, 'Number of Train Samples', 3, TRAIN_SAMPLES)

print(" ")

EPOCHS = [50, 100]
format = 'vcl_lr_0.001_withoutmlpinit_batch_256_coresetsize_0_epochs_{}_run_{}_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR, format, 'Epochs', 3, EPOCHS)

print(" ")

opts = ['withoutmlpinit', 'withmlpinit']
format = 'vcl_lr_0.001_{}_batch_256_coresetsize_0_epochs_50_run_{}_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR, format, 'MLP inits, epoch 50', 3, opts)

print(" ")

opts = ['withoutmlpinit', 'withmlpinit']
format = 'vcl_lr_0.001_{}_batch_256_coresetsize_0_epochs_100_run_{}_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR, format, 'MLP inits, epoch 100', 3, opts)

Experiment Learning Rate, opt: 0.01, Mean Acc: 0.807: Std Acc: 0.011
Experiment Learning Rate, opt: 0.001, Mean Acc: 0.911: Std Acc: 0.003
Experiment Learning Rate, opt: 0.0001, Mean Acc: 0.841: Std Acc: 0.011
 
Experiment Batch Size, opt: 256, Mean Acc: 0.911: Std Acc: 0.003
Experiment Batch Size, opt: 512, Mean Acc: 0.901: Std Acc: 0.003
Experiment Batch Size, opt: 1024, Mean Acc: 0.894: Std Acc: 0.003
 
Experiment Number of Train Samples, opt: 50, Mean Acc: 0.900: Std Acc: 0.000
Experiment Number of Train Samples, opt: 100, Mean Acc: 0.880: Std Acc: 0.016
 
Experiment Epochs, opt: 50, Mean Acc: 0.911: Std Acc: 0.003
Experiment Epochs, opt: 100, Mean Acc: 0.897: Std Acc: 0.008
 
Experiment MLP inits, epoch 50, opt: withoutmlpinit, Mean Acc: 0.911: Std Acc: 0.003
Experiment MLP inits, epoch 50, opt: withmlpinit, Mean Acc: 0.868: Std Acc: 0.011
 
Experiment MLP inits, epoch 100, opt: withoutmlpinit, Mean Acc: 0.897: Std Acc: 0.008
Experiment MLP inits, epoch 100, opt: withmlpinit, Mean

In [90]:
LOGS_DIR = 'logs'

opts = [200, 1000, 5000]
format = 'vcl_lr_0.001_withoutmlpinit_batch_256_coresetsize_{}_epochs_50_run_{}_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'vcl-coreset', 3, opts)

Experiment vcl-coreset, opt: 200, Mean Acc: 0.945: Std Acc: 0.001
Experiment vcl-coreset, opt: 1000, Mean Acc: 0.954: Std Acc: 0.001
Experiment vcl-coreset, opt: 5000, Mean Acc: 0.959: Std Acc: 0.001


In [82]:
LOGS_DIR = 'logs/k_clusters'

opts = ['kcenter_greedy', 'kmedians', 'kmeans']
format = 'vcl_clusters_{}_lr_0.001_withoutmlpinit_batch_256_coresetsize_200_epochs_50_run_{}_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'k-cluster-method', 3, opts)

Experiment k-cluster-method, opt: kcenter_greedy, Mean Acc: 0.931: Std Acc: 0.004
Experiment k-cluster-method, opt: kmedians, Mean Acc: 0.942: Std Acc: 0.001
Experiment k-cluster-method, opt: kmeans, Mean Acc: 0.947: Std Acc: 0.002


In [83]:
LOGS_DIR = 'logs/k_clusters'

opts = ['kcenter_greedy', 'kmedians', 'kmeans']
format = 'vcl_clusters_{}_lr_0.001_withoutmlpinit_batch_256_coresetsize_200_epochs_50_run_{}_no_tasks_10.txt' 

compute_mean_std(LOGS_DIR,  format, 'k-cluster-method', 3, opts)

Experiment k-cluster-method, opt: kcenter_greedy, Mean Acc: 0.931: Std Acc: 0.004
Experiment k-cluster-method, opt: kmedians, Mean Acc: 0.942: Std Acc: 0.001
Experiment k-cluster-method, opt: kmeans, Mean Acc: 0.947: Std Acc: 0.002


In [84]:
LOGS_DIR = 'logs/cluster_coreset_only'

opts = ['kcenter_greedy', 'kmedians', 'kmeans']
format = 'clustering_coreset_only_{}_model_type_mlp_lr_0.001_batch_256_coresetsize_200_epochs_100_run_{}.txt' 

compute_mean_std(LOGS_DIR,  format, 'k-cluster-only', 3, opts)

Experiment k-cluster-only, opt: kcenter_greedy, Mean Acc: 0.599: Std Acc: 0.011
Experiment k-cluster-only, opt: kmedians, Mean Acc: 0.769: Std Acc: 0.001
Experiment k-cluster-only, opt: kmeans, Mean Acc: 0.808: Std Acc: 0.003


In [85]:
LOGS_DIR = 'logs/coreset_only'

opts = [200, 400, 1000, 2500, 5000]
format = 'coreset_only_model_type_mlp_lr_0.001_batch_256_coresetsize_{}_epochs_100_run_{}.txt' 

compute_mean_std(LOGS_DIR,  format, 'coresetonly', 3, opts)

Experiment coresetonly, opt: 200, Mean Acc: 0.770: Std Acc: 0.007
Experiment coresetonly, opt: 400, Mean Acc: 0.823: Std Acc: 0.001
Experiment coresetonly, opt: 1000, Mean Acc: 0.871: Std Acc: 0.001
Experiment coresetonly, opt: 2500, Mean Acc: 0.904: Std Acc: 0.002
Experiment coresetonly, opt: 5000, Mean Acc: 0.924: Std Acc: 0.001


In [94]:
LOGS_DIR = 'logs'

opts = ['entropy', 'prob_std']

format = 'only_asdf_bad_uncertainty_epochs_vcl_lr_0.001_withoutmlpinit_batch_256_uncertainty_coreset_method_{}_uncertainty_coreset_size_150_coresetsize_50_epochs_50_run_0_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'uncertainty, 50-150', 3, opts)

print(" ")

format = 'only_asdf_bad_uncertainty_epochs_vcl_lr_0.001_withoutmlpinit_batch_256_uncertainty_coreset_method_{}_uncertainty_coreset_size_100_coresetsize_100_epochs_50_run_0_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'uncertainty, 100-100', 3, opts)

print(" ")

format = 'only_asdf_bad_uncertainty_epochs_vcl_lr_0.001_withoutmlpinit_batch_256_uncertainty_coreset_method_{}_uncertainty_coreset_size_50_coresetsize_150_epochs_50_run_0_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'uncertainty, 150-50', 3, opts)

print(" ")

format = 'only_asdf_bad_uncertainty_epochs_vcl_lr_0.001_withoutmlpinit_batch_256_uncertainty_coreset_method_{}_uncertainty_coreset_size_150_coresetsize_50_epochs_50_run_0_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'uncertainty, 50-150', 3, opts)

print(" ")

format = 'only_asdf_bad_uncertainty_epochs_vcl_lr_0.001_withoutmlpinit_batch_256_uncertainty_coreset_method_{}_uncertainty_coreset_size_4750_coresetsize_250_epochs_50_run_0_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'uncertainty, 250-4750', 3, opts)

print(" ")

format = 'only_asdf_bad_uncertainty_epochs_vcl_lr_0.001_withoutmlpinit_batch_256_uncertainty_coreset_method_{}_uncertainty_coreset_size_4000_coresetsize_1000_epochs_50_run_0_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'uncertainty, 1000-400', 3, opts)

print(" ")

format = 'only_asdf_bad_uncertainty_epochs_vcl_lr_0.001_withoutmlpinit_batch_256_uncertainty_coreset_method_{}_uncertainty_coreset_size_2500_coresetsize_2500_epochs_50_run_0_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'uncertainty, 2500-2500', 3, opts)

print(" ")

format = 'only_asdf_bad_uncertainty_epochs_vcl_lr_0.001_withoutmlpinit_batch_256_uncertainty_coreset_method_{}_uncertainty_coreset_size_1000_coresetsize_4000_epochs_50_run_0_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'uncertainty, 4000-1000', 3, opts)


print(" ")

format = 'only_asdf_bad_uncertainty_epochs_vcl_lr_0.001_withoutmlpinit_batch_256_uncertainty_coreset_method_{}_uncertainty_coreset_size_250_coresetsize_4750_epochs_50_run_0_no_tasks_10.txt' 
compute_mean_std(LOGS_DIR,  format, 'uncertainty, 4750-250', 3, opts)

Experiment uncertainty, 50-150, opt: entropy, Mean Acc: 0.941: Std Acc: 0.000
Experiment uncertainty, 50-150, opt: prob_std, Mean Acc: 0.943: Std Acc: 0.000
 
Experiment uncertainty, 100-100, opt: entropy, Mean Acc: 0.942: Std Acc: 0.000
Experiment uncertainty, 100-100, opt: prob_std, Mean Acc: 0.941: Std Acc: 0.000
 
Experiment uncertainty, 150-50, opt: entropy, Mean Acc: 0.946: Std Acc: 0.000
Experiment uncertainty, 150-50, opt: prob_std, Mean Acc: 0.943: Std Acc: 0.000
 
Experiment uncertainty, 50-150, opt: entropy, Mean Acc: 0.941: Std Acc: 0.000
Experiment uncertainty, 50-150, opt: prob_std, Mean Acc: 0.943: Std Acc: 0.000
 
Experiment uncertainty, 250-4750, opt: entropy, Mean Acc: 0.950: Std Acc: 0.000
Experiment uncertainty, 250-4750, opt: prob_std, Mean Acc: 0.951: Std Acc: 0.000
 
Experiment uncertainty, 1000-400, opt: entropy, Mean Acc: 0.955: Std Acc: 0.000
Experiment uncertainty, 1000-400, opt: prob_std, Mean Acc: 0.953: Std Acc: 0.000
 
Experiment uncertainty, 2500-2500, o