### Independent Calibration Result

In [358]:
import os
import re
from itertools import product
from collections import defaultdict

import numpy as np
import pandas as pd
import pickle as pkl

result_root  = "../result"
log_files = os.listdir(result_root) 

dataset_list = ['mnist', 'cifar10']
method_list  = ['raw', 'raw+ts', 'raw+mcdrop', 'raw+ensemble', 'raw+lula', 'raw+ours', 'cskd', 'focal', 'bm', 'gp']
noise_type_list  = ['rcn', 'linear']
noise_level_list = ['0.2', '0.4', '0.6']
seed_list   = ['77', '78', '79']

In [359]:
combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)

res_dict_raw = defaultdict(list)
for combo in combos:
    
    dataset, method, noise_type, noise_level, seed = combo    
    log_pattern = re.compile(f"{dataset}_{method.replace('+', '.*')}_{noise_type}_{noise_level}_{seed}.*.pkl")
    log_paths = list(filter(log_pattern.match, log_files))
    
    for log_path in log_paths:
        
        with open(os.path.join(result_root, log_path), 'rb') as f:
            config = pkl.load(f)
            result = pkl.load(f)
        f.close()
        
#         if config['train'][dataset]['N_EPOCHS'] < 40:
#             continue
        
        res_dict_raw['dataset'].append(dataset)
        res_dict_raw['method'].append(method.replace('raw+', ''))
        res_dict_raw['noise_type'].append(noise_type)
        res_dict_raw['noise_level'].append(noise_level)
        res_dict_raw['seed'].append(seed)
        
        res_dict_raw['l1_final'].append(result['l1'][-1])
        res_dict_raw['acc_final'].append(result['acc'][-1])
        res_dict_raw['ece_final'].append(result['ece'][-1])
        res_dict_raw['loss_final'].append(result['loss'][-1])
        
        res_dict_raw['l1_best'].append(min(result['l1']))
        res_dict_raw['acc_best'].append(max(result['acc']))
        res_dict_raw['ece_best'].append(min(result['ece']))
        res_dict_raw['loss_best'].append(min(result['loss']))

In [360]:
field_list = ['dataset', 'method', 'noise_type', 'noise_level', 'seed']
res_dict = pd.DataFrame(res_dict_raw).groupby(field_list).first()
res_dict = res_dict.reset_index()

In [361]:
method_columns_names = ['raw', 'ts', 'mcdrop', 'ensemble', 'lula', 'cskd', 'focal', 'bm', 'gp', 'ours']
agg_dict = res_dict[field_list+['l1_final', 'acc_final', 'ece_final', 'loss_final', 'l1_best', 'acc_best', 'ece_best', 'loss_best']].groupby(by=['dataset', 'noise_type', 'noise_level', 'method']).agg(func=['mean', 'std', 'size'])
agg_dict['ACC'] = agg_dict['acc_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['acc_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')
agg_dict['L1']  = agg_dict['l1_best']['mean'].round(3).apply( lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['l1_best']['std'].round(3).apply(lambda x:  '{:,.3f}'.format(x)).astype('str')
agg_dict['ECE'] = agg_dict['ece_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['ece_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')

agg_dict[['ACC', 'L1', 'ECE']].columns = ['ACC', 'L1', 'ECE']
agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'idl'], ['0.2', '0.4', '0.6'], method_columns_names]

  agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'idl'], ['0.2', '0.4', '0.6'], method_columns_names]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ACC,L1,ECE
dataset,noise_type,noise_level,method,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
mnist,rcn,0.2,raw,0.985 $\pm$ 0.002,0.593 $\pm$ 0.019,0.107 $\pm$ 0.121
mnist,rcn,0.2,ts,0.001 $\pm$ 0.000,0.699 $\pm$ 0.001,0.102 $\pm$ 0.002
mnist,rcn,0.2,mcdrop,0.993 $\pm$ 0.000,0.651 $\pm$ 0.008,0.106 $\pm$ 0.008
mnist,rcn,0.2,cskd,1.000 $\pm$ nan,0.605 $\pm$ nan,0.200 $\pm$ nan
mnist,rcn,0.2,ours,0.940 $\pm$ 0.029,0.231 $\pm$ 0.123,0.105 $\pm$ 0.103
mnist,rcn,0.4,raw,0.982 $\pm$ 0.004,0.299 $\pm$ 0.190,0.029 $\pm$ 0.007
mnist,rcn,0.4,ts,0.656 $\pm$ 0.566,0.414 $\pm$ 0.074,0.037 $\pm$ 0.057
mnist,rcn,0.4,mcdrop,0.991 $\pm$ 0.001,0.470 $\pm$ 0.000,0.105 $\pm$ 0.038
mnist,rcn,0.4,ours,0.956 $\pm$ 0.009,0.087 $\pm$ 0.002,0.044 $\pm$ 0.005
mnist,rcn,0.6,raw,0.953 $\pm$ 0.006,0.232 $\pm$ 0.001,0.015 $\pm$ 0.001


In [362]:
combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)
gpu_start    = 0
gpu = gpu_start

cmd_list = []
for combo in combos:
    dataset, method, noise_type, noise_level, seed = combo
    
    if dataset == 'mnist':
        continue
    
    cond1 = (res_dict['dataset']==dataset)
    cond2 = (res_dict['method']==method)
    cond3 = (res_dict['noise_type']==noise_type)
    cond4 = (res_dict['noise_level']==noise_level)
    cond5 = (res_dict['seed']==seed)
    
    if len(res_dict.loc[cond1 & cond2 & cond3 & cond4 & cond5]) == 0:
        
        cmd_list.append(f"CUDA_VISIBLE_DEVICES={gpu} "+\
                        f"numactl --physcpubind=0-68 "+\
                        f"python -W ignore run_calibration.py "+\
                        f"--dataset {dataset} "+\
                        f"--noise_type {noise_type} "+\
                        f"--noise_strength {noise_level} "+\
                        f"--method {method} "+\
                        f"--gpu  {gpu} "+\
                        f"--seed {seed} &")

        gpu = (gpu+1)%6
        if gpu == 0:
            gpu = gpu_start

In [363]:
if len(cmd_list):
    with open('execute_missing_experiments.sh', 'w') as f:
        for cmd in cmd_list[30:50]:
            f.write(cmd)
            f.write('\n')
        f.close()

cmd_list

['CUDA_VISIBLE_DEVICES=0 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset cifar10 --noise_type linear --noise_strength 0.2 --method raw --gpu  0 --seed 77 &',
 'CUDA_VISIBLE_DEVICES=1 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset cifar10 --noise_type linear --noise_strength 0.2 --method raw --gpu  1 --seed 78 &',
 'CUDA_VISIBLE_DEVICES=2 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset cifar10 --noise_type linear --noise_strength 0.2 --method raw --gpu  2 --seed 79 &',
 'CUDA_VISIBLE_DEVICES=3 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset cifar10 --noise_type linear --noise_strength 0.4 --method raw --gpu  3 --seed 77 &',
 'CUDA_VISIBLE_DEVICES=4 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset cifar10 --noise_type linear --noise_strength 0.4 --method raw --gpu  4 --seed 78 &',
 'CUDA_VISIBLE_DEVICES=5 numactl --physcpubind=0-68 python -W ignore run_calibration.

### Joint Calibration Result

In [364]:
method_list  = ['raw+ts+ours', 'raw+mcdrop+ours', 'raw+ensemble+ours', 'raw+lula+ours', 'cskd+ours', 'focal+ours', 'bm+ours', 'raw+lula+ours', 'gp+ours']
# method_list  = ['raw+ts+ours', 'raw+mcdrop+ours', 'raw+ensemble+ours']


combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)

res_dict_raw = defaultdict(list)
for combo in combos:
    
    dataset, method, noise_type, noise_level, seed = combo    
    log_pattern = re.compile(f"{dataset}_{method.replace('+', '.*')}_{noise_type}_{noise_level}_{seed}.*.pkl")
    log_paths = list(filter(log_pattern.match, log_files))
    
    for log_path in log_paths:
        
        with open(os.path.join(result_root, log_path), 'rb') as f:
            config = pkl.load(f)
            result = pkl.load(f)
        f.close()
        
        if config['train'][dataset]['N_EPOCHS'] != 40:
            continue
        
        res_dict_raw['dataset'].append(dataset)
        res_dict_raw['method'].append(method.replace('raw+', ''))
        res_dict_raw['noise_type'].append(noise_type)
        res_dict_raw['noise_level'].append(noise_level)
        res_dict_raw['seed'].append(seed)
        
        res_dict_raw['l1_final'].append(result['l1'][-1])
        res_dict_raw['acc_final'].append(result['acc'][-1])
        res_dict_raw['ece_final'].append(result['ece'][-1])
        res_dict_raw['loss_final'].append(result['loss'][-1])
        
        res_dict_raw['l1_best'].append(min(result['l1']))
        res_dict_raw['acc_best'].append(max(result['acc']))
        res_dict_raw['ece_best'].append(min(result['ece']))
        res_dict_raw['loss_best'].append(min(result['loss']))
        
field_list = ['dataset', 'method', 'noise_type', 'noise_level', 'seed']
res_dict = pd.DataFrame(res_dict_raw).groupby(field_list).first()
res_dict = res_dict.reset_index()

method_columns_names = ['ts+ours', 'mcdrop+ours', 'ensemble+ours', 'lula+ours', 'cskd+ours', 'focal+ours', 'bm+ours', 'lula+ours', 'gp+ours']
agg_dict = res_dict[field_list+['l1_final', 'acc_final', 'ece_final', 'loss_final', 'l1_best', 'acc_best', 'ece_best', 'loss_best']].groupby(by=['dataset', 'noise_type', 'noise_level', 'method']).agg(func=['mean', 'std', 'size'])
agg_dict['ACC'] = agg_dict['acc_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['acc_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')
agg_dict['L1']  = agg_dict['l1_best']['mean'].round(3).apply( lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['l1_best']['std'].round(3).apply(lambda x:  '{:,.3f}'.format(x)).astype('str')
agg_dict['ECE'] = agg_dict['ece_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['ece_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')

agg_dict[['ACC', 'L1', 'ECE']].columns = ['ACC', 'L1', 'ECE']
agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'idl'], ['0.2', '0.4', '0.6'], method_columns_names]

pivot_res = agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'idl'], ['0.2', '0.4', '0.6'], method_columns_names]
pivot_table = pd.pivot_table(pivot_res, index=['dataset', 'noise_type', 'noise_level'], columns='method', aggfunc=lambda x: x)
pivot_tab_acc = pivot_table['ACC']
pivot_tab_ece = pivot_table['ECE']
pitot_tab_l1  = pivot_table['L1']
pivot_tab_acc['metric'] = 'ACC'
pivot_tab_ece['metric'] = 'ECE'
pitot_tab_l1['metric']  = 'L1'

pivot_res = pd.concat([pivot_tab_acc, pivot_tab_ece, pitot_tab_l1])
pivot_res = pivot_res.reset_index()
pivot_res.columns = [[y for y in x if len(y)>0][0] for x in list(pivot_res.reset_index().columns)][1:]
pivot_res = pivot_res.set_index(['dataset', 'noise_type', 'noise_level', 'metric'])
pivot_res = pivot_res.loc[['mnist', 'cifar10'], ['rcn', 'idl'], ['0.2', '0.4', '0.6'], ['ACC', 'ECE', 'L1']]
pivot_res

  agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'idl'], ['0.2', '0.4', '0.6'], method_columns_names]
  pivot_res = agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'idl'], ['0.2', '0.4', '0.6'], method_columns_names]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pivot_tab_acc['metric'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pivot_tab_ece['metric'] = 'ECE'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documen

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,bm+ours,cskd+ours,focal+ours,gp+ours,mcdrop+ours,ts+ours
dataset,noise_type,noise_level,metric,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
mnist,rcn,0.2,ACC,0.935 $\pm$ 0.000,0.960 $\pm$ 0.001,0.960 $\pm$ 0.013,0.991 $\pm$ nan,0.923 $\pm$ 0.000,0.975 $\pm$ 0.003
mnist,rcn,0.2,ECE,0.172 $\pm$ 0.004,0.032 $\pm$ 0.002,0.033 $\pm$ 0.002,0.017 $\pm$ nan,0.036 $\pm$ 0.046,0.030 $\pm$ 0.000
mnist,rcn,0.2,L1,0.151 $\pm$ 0.001,0.093 $\pm$ 0.001,0.109 $\pm$ 0.004,0.049 $\pm$ nan,0.307 $\pm$ 0.020,0.090 $\pm$ 0.008
mnist,rcn,0.4,ACC,0.908 $\pm$ 0.001,0.949 $\pm$ 0.002,0.925 $\pm$ 0.018,0.304 $\pm$ 0.149,0.921 $\pm$ 0.000,0.956 $\pm$ 0.009
mnist,rcn,0.4,ECE,0.244 $\pm$ 0.008,0.043 $\pm$ 0.009,0.035 $\pm$ 0.001,0.006 $\pm$ 0.002,0.019 $\pm$ 0.019,0.044 $\pm$ 0.005
mnist,rcn,0.4,L1,0.153 $\pm$ 0.004,0.086 $\pm$ 0.002,0.166 $\pm$ 0.007,0.399 $\pm$ 0.057,0.252 $\pm$ 0.015,0.087 $\pm$ 0.002
mnist,rcn,0.6,ACC,0.573 $\pm$ 0.414,0.943 $\pm$ 0.007,0.908 $\pm$ 0.002,0.114 $\pm$ 0.001,0.918 $\pm$ 0.001,0.936 $\pm$ 0.007
mnist,rcn,0.6,ECE,0.426 $\pm$ 0.414,0.043 $\pm$ 0.010,0.072 $\pm$ 0.009,0.004 $\pm$ 0.002,0.006 $\pm$ 0.001,0.050 $\pm$ 0.030
mnist,rcn,0.6,L1,0.275 $\pm$ 0.281,0.071 $\pm$ 0.003,0.105 $\pm$ 0.001,0.283 $\pm$ 0.000,0.152 $\pm$ 0.006,0.068 $\pm$ 0.006


In [365]:
combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)
gpu_start    = 0
gpu = gpu_start
capacity = 100

pid_ind = 0
cmd_list = []
for combo in combos:
    dataset, method, noise_type, noise_level, seed = combo
    
    if dataset == 'cifar10':
        continue
    
    cond1 = (res_dict['dataset']==dataset)
    cond2 = (res_dict['method']==method)
    cond3 = (res_dict['noise_type']==noise_type)
    cond4 = (res_dict['noise_level']==noise_level)
    cond5 = (res_dict['seed']==seed)
    
    if len(res_dict.loc[cond1 & cond2 & cond3 & cond4 & cond5]) == 0:
        
        cmd_list.append(f"CUDA_VISIBLE_DEVICES={gpu} "+\
                        f"numactl --physcpubind=0-40 "+\
                        f"python -W ignore run_calibration.py "+\
                        f"--dataset {dataset} "+\
                        f"--noise_type {noise_type} "+\
                        f"--noise_strength {noise_level} "+\
                        f"--method {method} "+\
                        f"--gpu  {gpu} "+\
                        f"--seed {seed} &")
        
        cmd_list.append(f"pids[{pid_ind}]=$!")
        
        if pid_ind%capacity==capacity-1:
            cmd_list.append(
                "for pid in ${pids[*]}; \ndo\n"+\
                "\t wait $pid \n"+\
                "done"
            )
        
        pid_ind = (pid_ind+1)%capacity
        
        gpu = (gpu+1)%6
        if gpu == 0:
            gpu = gpu_start

In [366]:
start = 0
end = 404

if len(cmd_list):
    with open('execute_missing_experiments.sh', 'w') as f:
        for cmd in cmd_list[start:end]:
            f.write(cmd)
            f.write('\n')
        f.close()

cmd_list

['CUDA_VISIBLE_DEVICES=0 numactl --physcpubind=0-40 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.2 --method raw+ts+ours --gpu  0 --seed 77 &',
 'pids[0]=$!',
 'CUDA_VISIBLE_DEVICES=1 numactl --physcpubind=0-40 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.2 --method raw+ts+ours --gpu  1 --seed 78 &',
 'pids[1]=$!',
 'CUDA_VISIBLE_DEVICES=2 numactl --physcpubind=0-40 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.2 --method raw+ts+ours --gpu  2 --seed 79 &',
 'pids[2]=$!',
 'CUDA_VISIBLE_DEVICES=3 numactl --physcpubind=0-40 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.4 --method raw+ts+ours --gpu  3 --seed 77 &',
 'pids[3]=$!',
 'CUDA_VISIBLE_DEVICES=4 numactl --physcpubind=0-40 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.4 --method raw+ts+ours --gpu  4 --seed 78 &',
 'pids[4]=