### Independent Calibration Result

In [249]:
import os
import re
from itertools import product
from collections import defaultdict

import numpy as np
import pandas as pd
import pickle as pkl

result_root  = "../result"
log_files = os.listdir(result_root) 

dataset_list = ['mnist', 'cifar10']
method_list  = ['raw', 'raw+ts', 'raw+mcdrop', 'raw+ensemble', 'raw+lula', 'raw+ours', 'cskd', 'focal', 'bm', 'gp']
noise_type_list  = ['rcn', 'idl']
noise_level_list = ['0.2', '0.4', '0.6']
seed_list   = ['77', '78', '79']

In [250]:
combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)

res_dict_raw = defaultdict(list)
for combo in combos:
    
    dataset, method, noise_type, noise_level, seed = combo    
    log_pattern = re.compile(f"{dataset}_{method.replace('+', '.*')}_{noise_type}_{noise_level}_{seed}.*.pkl")
    log_paths = list(filter(log_pattern.match, log_files))
    
    for log_path in log_paths:
        
        with open(os.path.join(result_root, log_path), 'rb') as f:
            config = pkl.load(f)
            result = pkl.load(f)
        f.close()
        
#         if config['train'][dataset]['N_EPOCHS'] < 40:
#             continue
        
        res_dict_raw['dataset'].append(dataset)
        res_dict_raw['method'].append(method.replace('raw+', ''))
        res_dict_raw['noise_type'].append(noise_type)
        res_dict_raw['noise_level'].append(noise_level)
        res_dict_raw['seed'].append(seed)
        
        res_dict_raw['l1_final'].append(result['l1'][-1])
        res_dict_raw['acc_final'].append(result['acc'][-1])
        res_dict_raw['ece_final'].append(result['ece'][-1])
        res_dict_raw['loss_final'].append(result['loss'][-1])
        
        res_dict_raw['l1_best'].append(min(result['l1']))
        res_dict_raw['acc_best'].append(max(result['acc']))
        res_dict_raw['ece_best'].append(min(result['ece']))
        res_dict_raw['loss_best'].append(min(result['loss']))

In [251]:
field_list = ['dataset', 'method', 'noise_type', 'noise_level', 'seed']
res_dict = pd.DataFrame(res_dict_raw).groupby(field_list).first()
res_dict = res_dict.reset_index()

In [252]:
method_columns_names = ['raw', 'ts', 'mcdrop', 'ensemble', 'lula', 'cskd', 'focal', 'bm', 'gp', 'ours']
agg_dict = res_dict[field_list+['l1_final', 'acc_final', 'ece_final', 'loss_final', 'l1_best', 'acc_best', 'ece_best', 'loss_best']].groupby(by=['dataset', 'noise_type', 'noise_level', 'method']).agg(func=['mean', 'std', 'size'])
agg_dict['ACC'] = agg_dict['acc_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['acc_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')
agg_dict['L1']  = agg_dict['l1_best']['mean'].round(3).apply( lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['l1_best']['std'].round(3).apply(lambda x:  '{:,.3f}'.format(x)).astype('str')
agg_dict['ECE'] = agg_dict['ece_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['ece_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')

agg_dict[['ACC', 'L1', 'ECE']].columns = ['ACC', 'L1', 'ECE']
agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'idl'], ['0.2', '0.4', '0.6'], method_columns_names]

  agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'idl'], ['0.2', '0.4', '0.6'], method_columns_names]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ACC,L1,ECE
dataset,noise_type,noise_level,method,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mnist,rcn,0.2,cskd,1.000 $\pm$ nan,0.605 $\pm$ nan,0.200 $\pm$ nan


In [253]:
combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)
gpu_start    = 0
gpu = gpu_start

cmd_list = []
for combo in combos:
    dataset, method, noise_type, noise_level, seed = combo
    
    if dataset == 'mnist':
        continue
    
    cond1 = (res_dict['dataset']==dataset)
    cond2 = (res_dict['method']==method)
    cond3 = (res_dict['noise_type']==noise_type)
    cond4 = (res_dict['noise_level']==noise_level)
    cond5 = (res_dict['seed']==seed)
    
    if len(res_dict.loc[cond1 & cond2 & cond3 & cond4 & cond5]) == 0:
        
        cmd_list.append(f"CUDA_VISIBLE_DEVICES={gpu} "+\
                        f"numactl --physcpubind=0-68 "+\
                        f"python -W ignore run_calibration.py "+\
                        f"--dataset {dataset} "+\
                        f"--noise_type {noise_type} "+\
                        f"--noise_strength {noise_level} "+\
                        f"--method {method} "+\
                        f"--gpu  {gpu} "+\
                        f"--seed {seed} &")

        gpu = (gpu+1)%6
        if gpu == 0:
            gpu = gpu_start

In [254]:
if len(cmd_list):
    with open('execute_missing_experiments.sh', 'w') as f:
        for cmd in cmd_list[30:50]:
            f.write(cmd)
            f.write('\n')
        f.close()

cmd_list

['CUDA_VISIBLE_DEVICES=0 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset cifar10 --noise_type rcn --noise_strength 0.2 --method raw --gpu  0 --seed 77 &',
 'CUDA_VISIBLE_DEVICES=1 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset cifar10 --noise_type rcn --noise_strength 0.2 --method raw --gpu  1 --seed 78 &',
 'CUDA_VISIBLE_DEVICES=2 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset cifar10 --noise_type rcn --noise_strength 0.2 --method raw --gpu  2 --seed 79 &',
 'CUDA_VISIBLE_DEVICES=3 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset cifar10 --noise_type rcn --noise_strength 0.4 --method raw --gpu  3 --seed 77 &',
 'CUDA_VISIBLE_DEVICES=4 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset cifar10 --noise_type rcn --noise_strength 0.4 --method raw --gpu  4 --seed 78 &',
 'CUDA_VISIBLE_DEVICES=5 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset ci

### Joint Calibration Result

In [192]:
method_list  = ['raw+ts+ours', 'raw+mcdrop+ours', 'raw+ensemble+ours', 'raw+lula+ours', 'cskd+ours', 'focal+ours', 'bm+ours', 'gp+ours']

combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)

res_dict_raw = defaultdict(list)
for combo in combos:
    
    dataset, method, noise_type, noise_level, seed = combo    
    log_pattern = re.compile(f"{dataset}_{method.replace('+', '.*')}_{noise_type}_{noise_level}_{seed}.*.pkl")
    log_paths = list(filter(log_pattern.match, log_files))
    
    for log_path in log_paths:
        
        with open(os.path.join(result_root, log_path), 'rb') as f:
            config = pkl.load(f)
            result = pkl.load(f)
        f.close()
        
        res_dict_raw['dataset'].append(dataset)
        res_dict_raw['method'].append(method.replace('raw+', ''))
        res_dict_raw['noise_type'].append(noise_type)
        res_dict_raw['noise_level'].append(noise_level)
        res_dict_raw['seed'].append(seed)
        
        res_dict_raw['l1_final'].append(result['l1'][-1])
        res_dict_raw['acc_final'].append(result['acc'][-1])
        res_dict_raw['ece_final'].append(result['ece'][-1])
        res_dict_raw['loss_final'].append(result['loss'][-1])
        
        res_dict_raw['l1_best'].append(min(result['l1']))
        res_dict_raw['acc_best'].append(max(result['acc']))
        res_dict_raw['ece_best'].append(min(result['ece']))
        res_dict_raw['loss_best'].append(min(result['loss']))

In [193]:
field_list = ['dataset', 'method', 'noise_type', 'noise_level', 'seed']
res_dict = pd.DataFrame(res_dict_raw).groupby(field_list).first()
res_dict = res_dict.reset_index()

KeyError: 'dataset'

In [None]:
method_columns_names = ['ts', 'mcdrop', 'ensemble', 'lula', 'cskd', 'focal', 'bm', 'gp', 'ours']
agg_dict = res_dict[field_list+['l1_final', 'acc_final', 'ece_final', 'loss_final', 'l1_best', 'acc_best', 'ece_best', 'loss_best']].groupby(by=['dataset', 'noise_type', 'noise_level', 'method']).agg(func=['mean', 'std', 'size'])
agg_dict['ACC'] = agg_dict['acc_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['acc_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')
agg_dict['L1']  = agg_dict['l1_best']['mean'].round(3).apply( lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['l1_best']['std'].round(3).apply(lambda x:  '{:,.3f}'.format(x)).astype('str')
agg_dict['ECE'] = agg_dict['ece_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['ece_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')

agg_dict[['ACC', 'L1', 'ECE']].columns = ['ACC', 'L1', 'ECE']
agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'idl'], ['0.2', '0.4', '0.6'], method_columns_names]

In [None]:
combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)
gpu_start    = 2
gpu = gpu_start

cmd_list = []
for combo in combos:
    dataset, method, noise_type, noise_level, seed = combo
    
    cond1 = (res_dict['dataset']==dataset)
    cond2 = (res_dict['method']==method)
    cond3 = (res_dict['noise_type']==noise_type)
    cond4 = (res_dict['noise_level']==noise_level)
    cond5 = (res_dict['seed']==seed)
    
    if len(res_dict.loc[cond1 & cond2 & cond3 & cond4 & cond5]) == 0:
        
        cmd_list.append(f"CUDA_VISIBLE_DEVICES={gpu} python -W ignore run_calibration.py "+\
                        f"--dataset {dataset} "+\
                        f"--noise_type {noise_type} "+\
                        f"--noise {noise_level} "+\
                        f"--method {method} "+\
                        f"--gpus {gpu} "+\
                        f"--seed {seed} &")
    
        port_start += 1
        gpu = (gpu+1)%8
        if gpu == 0:
            gpu = gpu_start
            
if len(cmd_list):
    with open('execute_missing_experiments.sh', 'w') as f:
        for cmd in cmd_list:
            f.write(cmd)
            f.write('\n')
        f.close()