### Independent Calibration Result

In [55]:
import os
import re
from itertools import product
from collections import defaultdict

import numpy as np
import pandas as pd
import pickle as pkl

result_root  = "../result"
log_files = os.listdir(result_root) 

dataset_list = ['mnist', 'cifar10']
method_list  = ['raw', 'raw+ts', 'raw+mcdrop', 'raw+ensemble', 'cskd', 'focal', 'bm', 'gp']
method_list  = ['gp']
noise_type_list  = ['rcn', 'linear']
noise_level_list = ['0.2', '0.4', '0.6']
seed_list   = ['77', '78', '79']

In [57]:
combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)

res_dict_raw = defaultdict(list)
for combo in combos:
    
    dataset, method, noise_type, noise_level, seed = combo    
    log_pattern = re.compile(f"{dataset}_{method.replace('+', '.*')}_{noise_type}_{noise_level}_{seed}.*.pkl")
    log_paths = list(filter(log_pattern.match, log_files))
    
    for log_path in log_paths:
        
        with open(os.path.join(result_root, log_path), 'rb') as f:
            config = pkl.load(f)
            result = pkl.load(f)
        f.close()
        
        if config['train'][dataset]['N_EPOCHS'] != 200:
            continue
        
        res_dict_raw['dataset'].append(dataset)
        res_dict_raw['method'].append(method.replace('raw+', ''))
        res_dict_raw['noise_type'].append(noise_type)
        res_dict_raw['noise_level'].append(noise_level)
        res_dict_raw['seed'].append(seed)
        
        res_dict_raw['l1_final'].append(result['l1'][-1])
        res_dict_raw['acc_final'].append(result['acc'][-1])
        res_dict_raw['ece_final'].append(result['ece'][-1])
        res_dict_raw['loss_final'].append(result['loss'][-1])
        
        res_dict_raw['l1_best'].append(min(result['l1']))
        res_dict_raw['acc_best'].append(max(result['acc']))
        res_dict_raw['ece_best'].append(min(result['ece']))
        res_dict_raw['loss_best'].append(min(result['loss']))

field_list = ['dataset', 'method', 'noise_type', 'noise_level', 'seed']
res_dict = pd.DataFrame(res_dict_raw).groupby(field_list).first()
res_dict = res_dict.reset_index()

KeyError: 'dataset'

In [None]:
method_columns_names = ['raw', 'ts', 'mcdrop', 'ensemble', 'cskd', 'focal', 'bm', 'gp']
method_columns_names = ['gp']
agg_dict = res_dict[field_list+['l1_final', 'acc_final', 'ece_final', 'loss_final', 'l1_best', 'acc_best', 'ece_best', 'loss_best']].groupby(by=['dataset', 'noise_type', 'noise_level', 'method']).agg(func=['mean', 'std', 'size'])
agg_dict['ACC'] = agg_dict['acc_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['acc_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')
agg_dict['L1']  = agg_dict['l1_best']['mean'].round(3).apply( lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['l1_best']['std'].round(3).apply(lambda x:  '{:,.3f}'.format(x)).astype('str')
agg_dict['ECE'] = agg_dict['ece_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['ece_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')

agg_dict[['ACC', 'L1', 'ECE']].columns = ['ACC', 'L1', 'ECE']
agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'linear'], ['0.2', '0.4', '0.6'], method_columns_names]

pivot_res = agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'linear'], ['0.2', '0.4', '0.6'], method_columns_names]
pivot_table = pd.pivot_table(pivot_res, index=['dataset', 'noise_type', 'noise_level'], columns='method', aggfunc=lambda x: x)
pivot_tab_acc = pivot_table['ACC']
pivot_tab_ece = pivot_table['ECE']
pitot_tab_l1  = pivot_table['L1']
pivot_tab_acc['metric'] = 'ACC'
pivot_tab_ece['metric'] = 'ECE'
pitot_tab_l1['metric']  = 'L1'

# pivot_res = pd.concat([pivot_tab_acc, pivot_tab_ece, pitot_tab_l1])
# pivot_res = pivot_res.reset_index()
# pivot_res.columns = [[y for y in x if len(y)>0][0] for x in list(pivot_res.reset_index().columns)][1:]
# pivot_res = pivot_res.set_index(['dataset', 'noise_type', 'noise_level', 'metric'])
# # pivot_res = pivot_res.loc[['mnist', 'cifar10'], ['linear', 'rcn'], ['0.2', '0.4', '0.6']]
# # pivot_res.loc[['mnist'],:, :, ['L1']].iloc[:, [6, 5, 2, 1, 3, 0, 4]]
# pivot_res.loc[['mnist', 'cifar10'],:, :, ['L1']].iloc[:, 0:1]

In [None]:
torch.cuda.mem_get_info(device=torch.device('cuda:0'))

In [61]:
combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)
gpu_start    = 0
gpu = gpu_start
capacity = 20

pid_ind = 0
cmd_list = []
for combo in combos:
    dataset, method, noise_type, noise_level, seed = combo
    
    cond1 = (res_dict['dataset']==dataset)
    cond2 = (res_dict['method']==method)
    cond3 = (res_dict['noise_type']==noise_type)
    cond4 = (res_dict['noise_level']==noise_level)
    cond5 = (res_dict['seed']==seed)
    
    if len(res_dict.loc[cond1 & cond2 & cond3 & cond4 & cond5]) == 0:
        
        cmd_list.append(f"CUDA_VISIBLE_DEVICES={gpu} "+\
                        f"numactl --physcpubind=0-68 "+\
                        f"python -W ignore run_calibration.py "+\
                        f"--dataset {dataset} "+\
                        f"--noise_type {noise_type} "+\
                        f"--noise_strength {noise_level} "+\
                        f"--method {method} "+\
                        f"--gpu  {gpu} "+\
                        f"--seed {seed} &")

        cmd_list.append(f"pids[{pid_ind}]=$!")

        if pid_ind%capacity==capacity-1:
            cmd_list.append(
                "for pid in ${pids[*]}; \ndo\n"+\
                "\t wait $pid \n"+\
                "done"
            )

        pid_ind = (pid_ind+1)%capacity
    #         gpu = (gpu+1)%8

        gpu = np.random.choice([0,1,3,4,5], p=np.array([4,5,3,6,6])/np.array([4,5,3,6,6]).sum(), replace=True)

In [62]:
start = 0
end = 404

if len(cmd_list):
    with open('execute_missing_experiments.sh', 'w') as f:
        for cmd in cmd_list[start:end]:
            f.write(cmd)
            f.write('\n')
        f.close()

cmd_list

['CUDA_VISIBLE_DEVICES=0 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.2 --method gp --gpu  0 --seed 77 &',
 'pids[0]=$!',
 'CUDA_VISIBLE_DEVICES=1 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.2 --method gp --gpu  1 --seed 78 &',
 'pids[1]=$!',
 'CUDA_VISIBLE_DEVICES=0 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.2 --method gp --gpu  0 --seed 79 &',
 'pids[2]=$!',
 'CUDA_VISIBLE_DEVICES=4 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.4 --method gp --gpu  4 --seed 77 &',
 'pids[3]=$!',
 'CUDA_VISIBLE_DEVICES=3 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.4 --method gp --gpu  3 --seed 78 &',
 'pids[4]=$!',
 'CUDA_VISIBLE_DEVICES=3 numactl --physc

### Joint Calibration Result

In [566]:
method_list  = ['raw+ts+oursv1', 'raw+mcdrop+oursv1', 'raw+ensemble+oursv1', 'cskd+oursv1', 'focal+oursv1', 'bm+oursv1', 'gp+oursv1']
# method_list  = ['raw+ts+oursv1','raw+ensemble+oursv1']


combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)

res_dict_raw = defaultdict(list)
for combo in combos:
    
    dataset, method, noise_type, noise_level, seed = combo    
    log_pattern = re.compile(f"{dataset}_{method.replace('+', '.*')}_{noise_type}_{noise_level}_{seed}.*.pkl")
    log_paths = list(filter(log_pattern.match, log_files))
    
    for log_path in log_paths:
        
        with open(os.path.join(result_root, log_path), 'rb') as f:
            config = pkl.load(f)
            result = pkl.load(f)
        f.close()
        
        if config['train'][dataset]['N_EPOCHS'] != 40:
            continue
        
        res_dict_raw['dataset'].append(dataset)
        res_dict_raw['method'].append(method.replace('raw+', ''))
        res_dict_raw['noise_type'].append(noise_type)
        res_dict_raw['noise_level'].append(noise_level)
        res_dict_raw['seed'].append(seed)
        
        res_dict_raw['l1_final'].append(result['l1'][-1])
        res_dict_raw['acc_final'].append(result['acc'][-1])
        res_dict_raw['ece_final'].append(result['ece'][-1])
        res_dict_raw['loss_final'].append(result['loss'][-1])
        
        res_dict_raw['l1_best'].append(min(result['l1']))
        res_dict_raw['acc_best'].append(max(result['acc']))
        res_dict_raw['ece_best'].append(min(result['ece']))
        res_dict_raw['loss_best'].append(min(result['loss']))
        
field_list = ['dataset', 'method', 'noise_type', 'noise_level', 'seed']
res_dict = pd.DataFrame(res_dict_raw).groupby(field_list).first()
res_dict = res_dict.reset_index()

method_columns_names = ['ts+oursv1', 'mcdrop+oursv1', 'ensemble+oursv1', 'cskd+oursv1', 'focal+oursv1', 'bm+oursv1', 'gp+oursv1']
agg_dict = res_dict[field_list+['l1_final', 'acc_final', 'ece_final', 'loss_final', 'l1_best', 'acc_best', 'ece_best', 'loss_best']].groupby(by=['dataset', 'noise_type', 'noise_level', 'method']).agg(func=['mean', 'std', 'size'])
agg_dict['ACC'] = agg_dict['acc_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['acc_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')
agg_dict['L1']  = agg_dict['l1_best']['mean'].round(3).apply( lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['l1_best']['std'].round(3).apply(lambda x:  '{:,.3f}'.format(x)).astype('str')
agg_dict['ECE'] = agg_dict['ece_best']['mean'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str') + r' $\pm$ ' + agg_dict['ece_best']['std'].round(3).apply(lambda x: '{:,.3f}'.format(x)).astype('str')

agg_dict[['ACC', 'L1', 'ECE']].columns = ['ACC', 'L1', 'ECE']
agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'linear'], ['0.2', '0.4', '0.6'], method_columns_names]

pivot_res = agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'linear'], ['0.2', '0.4', '0.6'], method_columns_names]
pivot_table = pd.pivot_table(pivot_res, index=['dataset', 'noise_type', 'noise_level'], columns='method', aggfunc=lambda x: x)
pivot_tab_acc = pivot_table['ACC']
pivot_tab_ece = pivot_table['ECE']
pitot_tab_l1  = pivot_table['L1']
pivot_tab_acc['metric'] = 'ACC'
pivot_tab_ece['metric'] = 'ECE'
pitot_tab_l1['metric']  = 'L1'

pivot_res = pd.concat([pivot_tab_acc, pivot_tab_ece, pitot_tab_l1])
pivot_res = pivot_res.reset_index()
pivot_res.columns = [[y for y in x if len(y)>0][0] for x in list(pivot_res.reset_index().columns)][1:]
pivot_res = pivot_res.set_index(['dataset', 'noise_type', 'noise_level', 'metric'])
# pivot_res = pivot_res.loc[['mnist', 'cifar10'], ['linear', 'rcn'], ['0.2', '0.4', '0.6']]
# pivot_res.loc[['mnist'],:, :, ['L1']].iloc[:, [6, 5, 2, 1, 3, 0, 4]]
pivot_res.loc[['mnist'],:, :, ['L1']]

  agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'linear'], ['0.2', '0.4', '0.6'], method_columns_names]
  pivot_res = agg_dict[['ACC', 'L1', 'ECE']].loc[['mnist', 'cifar10'], ['rcn', 'linear'], ['0.2', '0.4', '0.6'], method_columns_names]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pivot_tab_acc['metric'] = 'ACC'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pivot_tab_ece['metric'] = 'ECE'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,bm+oursv1,cskd+oursv1,ensemble+oursv1,focal+oursv1,gp+oursv1,ts+oursv1
dataset,noise_type,noise_level,metric,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
mnist,linear,0.2,L1,0.112 $\pm$ 0.002,0.083 $\pm$ 0.007,0.080 $\pm$ 0.002,0.102 $\pm$ 0.001,0.156 $\pm$ 0.005,
mnist,linear,0.4,L1,0.104 $\pm$ 0.002,0.081 $\pm$ 0.001,0.056 $\pm$ 0.006,0.087 $\pm$ 0.003,0.070 $\pm$ 0.002,
mnist,linear,0.6,L1,0.079 $\pm$ 0.008,0.101 $\pm$ 0.002,0.097 $\pm$ 0.003,0.102 $\pm$ 0.001,0.073 $\pm$ 0.009,
mnist,rcn,0.2,L1,0.130 $\pm$ 0.004,0.075 $\pm$ 0.002,0.082 $\pm$ nan,0.097 $\pm$ 0.001,0.147 $\pm$ 0.014,
mnist,rcn,0.4,L1,0.156 $\pm$ 0.039,0.081 $\pm$ 0.005,0.085 $\pm$ nan,0.153 $\pm$ 0.009,0.409 $\pm$ 0.084,0.087 $\pm$ nan
mnist,rcn,0.6,L1,0.109 $\pm$ 0.001,0.065 $\pm$ 0.005,0.071 $\pm$ nan,0.101 $\pm$ 0.006,0.287 $\pm$ 0.000,0.058 $\pm$ nan


In [567]:
combos = product(dataset_list, method_list, noise_type_list, noise_level_list, seed_list)
gpu_start    = 0
gpu = gpu_start
capacity = 50

pid_ind = 0
cmd_list = []
for combo in combos:
    dataset, method, noise_type, noise_level, seed = combo
    
    if dataset == 'cifar10':
        continue
    
    if 'lula' in method:
        continue
    
    cond1 = (res_dict['dataset']==dataset)
    cond2 = (res_dict['method']==method)
    cond3 = (res_dict['noise_type']==noise_type)
    cond4 = (res_dict['noise_level']==noise_level)
    cond5 = (res_dict['seed']==seed)
    
    if len(res_dict.loc[cond1 & cond2 & cond3 & cond4 & cond5]) == 0:
        
        cmd_list.append(f"CUDA_VISIBLE_DEVICES={gpu} "+\
                        f"numactl --physcpubind=0-68 "+\
                        f"python -W ignore run_calibration.py "+\
                        f"--dataset {dataset} "+\
                        f"--noise_type {noise_type} "+\
                        f"--noise_strength {noise_level} "+\
                        f"--method {method} "+\
                        f"--gpu  {gpu} "+\
                        f"--seed {seed} &")
        
        cmd_list.append(f"pids[{pid_ind}]=$!")
        
        if pid_ind%capacity==capacity-1:
            cmd_list.append(
                "for pid in ${pids[*]}; \ndo\n"+\
                "\t wait $pid \n"+\
                "done"
            )
        
        pid_ind = (pid_ind+1)%capacity
#         gpu = (gpu+1)%8
        
        gpu = np.random.choice([0,1,2,3,4,5], p=np.array([11,5,11,11,11,11])/np.array([11,5,11,11,11,11]).sum(), replace=True)

In [568]:
start = 0
end = 404

if len(cmd_list):
    with open('execute_missing_experiments.sh', 'w') as f:
        for cmd in cmd_list[start:end]:
            f.write(cmd)
            f.write('\n')
        f.close()

cmd_list

['CUDA_VISIBLE_DEVICES=0 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.2 --method raw+ts+oursv1 --gpu  0 --seed 77 &',
 'pids[0]=$!',
 'CUDA_VISIBLE_DEVICES=0 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.2 --method raw+ts+oursv1 --gpu  0 --seed 78 &',
 'pids[1]=$!',
 'CUDA_VISIBLE_DEVICES=3 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.2 --method raw+ts+oursv1 --gpu  3 --seed 79 &',
 'pids[2]=$!',
 'CUDA_VISIBLE_DEVICES=1 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.4 --method raw+ts+oursv1 --gpu  1 --seed 77 &',
 'pids[3]=$!',
 'CUDA_VISIBLE_DEVICES=3 numactl --physcpubind=0-68 python -W ignore run_calibration.py --dataset mnist --noise_type rcn --noise_strength 0.4 --method raw+ts+oursv1 --gpu  3 --seed 78 &',
