In [3]:
import numpy as np
import os
import torch

#method_names = ['btwins_r50_1000', 'dcv2_r50_800', 'moco_r50_800', 'simclr_r50_200', 'simsiam_r50_100', 'supervised_r50', 'swav_r50_800']
method_names = ['btwins_r50_1000', 'dcv2_r50_800', 'moco_r50_800', 'simsiam_r50_100', 'supervised_r50', 'swav_r50_800']
vissl_dir = '/vulcanscratch/mgwillia/vissl/predictions/'
table_results = {}
for dataset in ['imagenet', 'aircraft', 'nabirds']:
#for dataset in ['aircraft', 'cub', 'flowers']:
    results = {}
    table_results[dataset] = {}
    for method_name in method_names:
        if method_name not in results:
            results[method_name] = {}

        val_predictions = torch.load(os.path.join(vissl_dir, '_'.join([method_name, dataset, 'predictions']) + '.pth.tar'))['val_predictions']
        val_targets = torch.load(os.path.join(vissl_dir, '_'.join([method_name, dataset, 'targets']) + '.pth.tar'))['val_targets'].numpy()
            
        for key, value in val_predictions.items():
            is_correct = np.where(value.numpy() == val_targets, 1, 0)
            results[method_name][key] = is_correct
            
    is_correct_unsupervised_counts = np.zeros(val_targets.shape[0])
    is_correct_supervised_counts = np.zeros(val_targets.shape[0])
    for method_name in method_names:
        if 'supervised' not in method_name:
            is_correct_unsupervised_counts += results[method_name]['res5']
        else:
            is_correct_supervised_counts += results[method_name]['res5']
    
    supervised_vs_pcts_map = {}
    is_correct_unsupervised = np.where(is_correct_unsupervised_counts > 0, 1, 0)
    is_correct_supervised = np.where(is_correct_supervised_counts > 0, 1, 0)
    is_correct_both = is_correct_unsupervised + is_correct_supervised
    some_correct = np.where(is_correct_both == 1, 1, 0)
    pct_mutual_correct = np.where(is_correct_both == 2, 1, 0).mean() * 100
    pct_only_unsupervised = np.where((some_correct - is_correct_supervised) == 1, 1, 0).mean() * 100
    pct_only_supervised = np.where((some_correct - is_correct_unsupervised) == 1, 1, 0).mean() * 100
    pct_mutual_incorrect = np.where(is_correct_both == 0, 1, 0).mean() * 100
    supervised_vs_pcts_map['both'] = f'{pct_mutual_correct:.2f}'
    supervised_vs_pcts_map['unsupervised'] = f'{pct_only_unsupervised:.2f}'
    supervised_vs_pcts_map['supervised'] = f'{pct_only_supervised:.2f}'
    supervised_vs_pcts_map['none'] = f'{pct_mutual_incorrect:.2f}'
        
    is_correct_all_unsupervised = np.where(is_correct_unsupervised_counts == (len(method_names) - 1), 1, 0)
    is_correct_none_unsupervised = np.where(is_correct_unsupervised_counts == 0, 1, 0)
    unsupervised_pcts_map = {
        'all': f'{(is_correct_all_unsupervised.mean() * 100):.2f}',
        'none': f'{(is_correct_none_unsupervised.mean() * 100):.2f}'
    }
    is_correct_one_unsupervised = np.where(is_correct_unsupervised_counts == 1, 1, 0)
    for method_name in method_names:
        if 'supervised' not in method_name:
            is_correct_cur = results[method_name]['res5']
            pct_only_cur = np.where((is_correct_cur + is_correct_one_unsupervised) == 2, 1, 0).mean() * 100
            unsupervised_pcts_map[method_name] = f'{pct_only_cur:.2f}'

    print('Supervised vs. unsupervised:', supervised_vs_pcts_map)
    print('Within unsupervised:', unsupervised_pcts_map)
    table_results[dataset]['sup_vs_unsup'] = supervised_vs_pcts_map
    table_results[dataset]['unsup'] = unsupervised_pcts_map


Supervised vs. unsupervised: {'both': '73.64', 'unsupervised': '10.34', 'supervised': '2.40', 'none': '13.62'}
Within unsupervised: {'all': '58.19', 'none': '16.02', 'btwins_r50_1000': '0.97', 'dcv2_r50_800': '1.74', 'moco_r50_800': '0.69', 'simsiam_r50_100': '0.64', 'swav_r50_800': '1.74'}
Supervised vs. unsupervised: {'both': '81.40', 'unsupervised': '18.45', 'supervised': '0.03', 'none': '0.12'}
Within unsupervised: {'all': '80.08', 'none': '0.15', 'btwins_r50_1000': '0.24', 'dcv2_r50_800': '0.18', 'moco_r50_800': '0.09', 'simsiam_r50_100': '0.00', 'swav_r50_800': '0.21'}
Supervised vs. unsupervised: {'both': '55.05', 'unsupervised': '17.78', 'supervised': '6.05', 'none': '21.12'}
Within unsupervised: {'all': '30.49', 'none': '27.17', 'btwins_r50_1000': '2.46', 'dcv2_r50_800': '4.16', 'moco_r50_800': '0.87', 'simsiam_r50_100': '0.86', 'swav_r50_800': '2.80'}


In [4]:
datasets = ['ImageNet', 'Aircraft', 'NABirds']
#datasets = ['Aircraft', 'CUB', 'Flowers']
method_print_name = {
    'btwins_r50_1000': 'BTwins',
    'dcv2_r50_800': 'DCv2',
    'moco_r50_800': 'MoCo',
#    'simclr_r50_800': 'SimCLR',
    'simsiam_r50_100': 'SimSiam',
    'swav_r50_800': 'SwAV',
}

print('\\begin{tabular}{l l l l}')
print('\\multirow{2}{*}{Method} & \\multicolumn{3}{c}{Dataset} \\\\ \\cmidrule{2-4}')
print('{} & \\multicolumn{1}{c}{' + '} & \\multicolumn{1}{c}{'.join(datasets) + '} \\\\')
print('\\midrule')
print(f'\t Sup. and Unsup. & {table_results[datasets[0].lower()]["sup_vs_unsup"]["both"]} & {table_results[datasets[1].lower()]["sup_vs_unsup"]["both"]} & {table_results[datasets[2].lower()]["sup_vs_unsup"]["both"]} \\\\ ')
print(f'\t Sup. Only \t & {table_results[datasets[0].lower()]["sup_vs_unsup"]["supervised"]} & {table_results[datasets[1].lower()]["sup_vs_unsup"]["supervised"]} & {table_results[datasets[2].lower()]["sup_vs_unsup"]["supervised"]} \\\\ ')
print(f'\t Unsup. Only \t & {table_results[datasets[0].lower()]["sup_vs_unsup"]["unsupervised"]} & {table_results[datasets[1].lower()]["sup_vs_unsup"]["unsupervised"]} & {table_results[datasets[2].lower()]["sup_vs_unsup"]["unsupervised"]} \\\\ ')
print(f'\t Neither \t & {table_results[datasets[0].lower()]["sup_vs_unsup"]["none"]} & {table_results[datasets[1].lower()]["sup_vs_unsup"]["none"]} & {table_results[datasets[2].lower()]["sup_vs_unsup"]["none"]} \\\\ ')
print('\\midrule')
print(f'\t All Unsup. \t & {table_results[datasets[0].lower()]["unsup"]["all"]} & {table_results[datasets[1].lower()]["unsup"]["all"]} & {table_results[datasets[2].lower()]["unsup"]["all"]} \\\\ ')
for print_key, print_name in method_print_name.items():  
    print(f'\t {print_name} Only \t & {table_results[datasets[0].lower()]["unsup"][print_key]} & {table_results[datasets[1].lower()]["unsup"][print_key]} & {table_results[datasets[2].lower()]["unsup"][print_key]} \\\\ ')
print(f'\t No Unsup. \t & {table_results[datasets[0].lower()]["unsup"]["none"]} & {table_results[datasets[1].lower()]["unsup"]["none"]} & {table_results[datasets[2].lower()]["unsup"]["none"]} \\\\ ')
print('\\bottomrule')
print('\\end{tabular}')

\begin{tabular}{l l l l}
\multirow{2}{*}{Method} & \multicolumn{3}{c}{Dataset} \\ \cmidrule{2-4}
{} & \multicolumn{1}{c}{ImageNet} & \multicolumn{1}{c}{Aircraft} & \multicolumn{1}{c}{NABirds} \\
\midrule
	 Sup. and Unsup. & 73.64 & 81.40 & 55.05 \\ 
	 Sup. Only 	 & 2.40 & 0.03 & 6.05 \\ 
	 Unsup. Only 	 & 10.34 & 18.45 & 17.78 \\ 
	 Neither 	 & 13.62 & 0.12 & 21.12 \\ 
\midrule
	 All Unsup. 	 & 58.19 & 80.08 & 30.49 \\ 
	 BTwins Only 	 & 0.97 & 0.24 & 2.46 \\ 
	 DCv2 Only 	 & 1.74 & 0.18 & 4.16 \\ 
	 MoCo Only 	 & 0.69 & 0.09 & 0.87 \\ 
	 SimSiam Only 	 & 0.64 & 0.00 & 0.86 \\ 
	 SwAV Only 	 & 1.74 & 0.21 & 2.80 \\ 
	 No Unsup. 	 & 16.02 & 0.15 & 27.17 \\ 
\bottomrule
\end{tabular}


In [9]:
import numpy as np
import os
import torch

#TODO: figure out label remapping for cars

method_names = ['btwins_r50_1000', 'dcv2_r50_800', 'moco_r50_800', 'simclr_r50_200', 'simsiam_r50_100', 'supervised_r50', 'swav_r50_800']
vissl_dir = '/vulcanscratch/mgwillia/vissl/predictions/'
for dataset in ['cub']:#, 'cars', 'imagenet', 'dogs']:
    ## TODO: eventually, compare top-1 to top-5
    results = {}
    for method_name in method_names:
        if method_name not in results:
            results[method_name] = {
                'train': {},
                'val': {}
            }
        train_predictions = torch.load(os.path.join(vissl_dir, '_'.join([method_name, dataset, 'predictions']) + '.pth.tar'))['train_predictions']
        train_targets = torch.load(os.path.join(vissl_dir, '_'.join([method_name, dataset, 'targets']) + '.pth.tar'))['train_targets'].numpy()
        
        val_predictions = torch.load(os.path.join(vissl_dir, '_'.join([method_name, dataset, 'predictions']) + '.pth.tar'))['val_predictions']
        val_targets = torch.load(os.path.join(vissl_dir, '_'.join([method_name, dataset, 'targets']) + '.pth.tar'))['val_targets'].numpy()
        
        for key, value in train_predictions.items():
            is_correct = np.where(value.numpy() == train_targets, 1, 0)
            results[method_name]['train'][key] = is_correct
            
        for key, value in val_predictions.items():
            is_correct = np.where(value.numpy() == val_targets, 1, 0)
            results[method_name]['val'][key] = is_correct

    for i, method_a in enumerate(method_names):
        method_a_results = results[method_a]['val']
        for j in range(i + 1, len(method_names)):
            print(method_names[i], method_names[j])
            method_b_results = results[method_names[j]]['val']
            for layer_name, is_correct_a in method_a_results.items():
                is_correct_b = method_b_results[layer_name]
                is_correct_total = is_correct_a + is_correct_b
                pct_mutual_correct = np.where(is_correct_total == 2, 1, 0).mean() * 100
                some_correct = np.where(is_correct_total == 1, 1, 0)
                pct_only_a = np.where((some_correct - is_correct_b) == 1, 1, 0).mean() * 100
                pct_only_b = np.where((some_correct - is_correct_a) == 1, 1, 0).mean() * 100
                pct_mutual_incorrect = np.where(is_correct_total == 0, 1, 0).mean() * 100
                print(f'{layer_name}, both: {pct_mutual_correct:.2f}%, neither: {pct_mutual_incorrect:.2f}%.')
                print(f'Unique, {method_a}: {pct_only_a:.2f}%, {method_names[j]}: {pct_only_b:.2f}%.')
                print('')
    
    is_correct_total = np.zeros(val_targets.shape[0])
    for method_name in method_names:
        is_correct_cur = results[method_name]['val']['res5']
        is_correct_total += is_correct_cur
        
    is_correct_all = np.where(is_correct_total == 7, 1, 0)
    pcts_map = {
        'all': f'{(is_correct_all.mean() * 100):.2f}%',
        'none': f'{(np.where(is_correct_total == 0, 1, 0).mean() * 100):.2f}%'
    }
    is_correct_one = np.where(is_correct_total == 1, 1, 0)
    for method_name in method_names:
        is_correct_cur = results[method_name]['val']['res5']
        pct_only_cur = np.where((is_correct_cur + is_correct_one) == 2, 1, 0).mean() * 100
        pcts_map[method_name] = f'{pct_only_cur:.2f}%'
        
    print(pcts_map)


btwins_r50_1000 dcv2_r50_800
conv1, both: 9.15%, neither: 83.02%.
Unique, btwins_r50_1000: 2.99%, dcv2_r50_800: 4.85%.

res2, both: 17.36%, neither: 70.99%.
Unique, btwins_r50_1000: 5.90%, dcv2_r50_800: 5.75%.

res3, both: 21.13%, neither: 64.91%.
Unique, btwins_r50_1000: 6.20%, dcv2_r50_800: 7.77%.

res4, both: 36.68%, neither: 45.06%.
Unique, btwins_r50_1000: 6.18%, dcv2_r50_800: 12.08%.

res5, both: 57.73%, neither: 23.92%.
Unique, btwins_r50_1000: 6.25%, dcv2_r50_800: 12.10%.

btwins_r50_1000 moco_r50_800
conv1, both: 9.25%, neither: 82.90%.
Unique, btwins_r50_1000: 2.88%, moco_r50_800: 4.97%.

res2, both: 17.95%, neither: 71.06%.
Unique, btwins_r50_1000: 5.32%, moco_r50_800: 5.68%.

res3, both: 21.63%, neither: 65.50%.
Unique, btwins_r50_1000: 5.70%, moco_r50_800: 7.18%.

res4, both: 36.28%, neither: 47.19%.
Unique, btwins_r50_1000: 6.58%, moco_r50_800: 9.96%.

res5, both: 47.48%, neither: 31.55%.
Unique, btwins_r50_1000: 16.50%, moco_r50_800: 4.47%.

btwins_r50_1000 simclr_r50_20

res2, both: 14.95%, neither: 71.90%.
Unique, simclr_r50_200: 6.27%, swav_r50_800: 6.89%.

res3, both: 19.66%, neither: 64.98%.
Unique, simclr_r50_200: 4.94%, swav_r50_800: 10.42%.

res4, both: 32.45%, neither: 47.05%.
Unique, simclr_r50_200: 5.25%, swav_r50_800: 15.26%.

res5, both: 41.97%, neither: 30.82%.
Unique, simclr_r50_200: 4.07%, swav_r50_800: 23.13%.

simsiam_r50_100 supervised_r50
conv1, both: 9.87%, neither: 83.00%.
Unique, simsiam_r50_100: 4.38%, supervised_r50: 2.74%.

res2, both: 17.22%, neither: 71.59%.
Unique, simsiam_r50_100: 5.73%, supervised_r50: 5.45%.

res3, both: 21.25%, neither: 62.19%.
Unique, simsiam_r50_100: 4.33%, supervised_r50: 12.24%.

res4, both: 34.88%, neither: 38.70%.
Unique, simsiam_r50_100: 4.31%, supervised_r50: 22.11%.

res5, both: 46.06%, neither: 22.94%.
Unique, simsiam_r50_100: 6.49%, supervised_r50: 24.51%.

simsiam_r50_100 swav_r50_800
conv1, both: 9.82%, neither: 81.71%.
Unique, simsiam_r50_100: 4.44%, swav_r50_800: 4.04%.

res2, both: 16.69%