In [1]:
import numpy as np
import math
import json
import glob
from functools import reduce

In [2]:
def load_results(dataset, model, pretrained, ckpt, metric, n_sample, seed):
    if pretrained:
        result_dir = f'../results/{dataset}/pretrained/{model}_{seed}-{ckpt}/{metric}_{n_sample}'
    else:
        result_dir = f'../results/{dataset}/scratch/{model}_{seed}-{ckpt}/{metric}_{n_sample}'
      
    result_fs = glob.glob(f'{result_dir}/*.json')
    results = []
    for file in result_fs:
        with open(file, 'r') as f:
            data = json.load(f)
        results.extend(data)
        
    return results

In [3]:
n_sample_dict = {
    'CIFAR-10': -1, 
    'CIFAR-100': -1, 
    'Living-17': -1,
    'Nonliving-26': -1,
    'Entity-13': -1,
    'Entity-30': -1,
    'ImageNet': -1,
    'RxRx1': -1,
    'FMoW': -1,
    'Amazon': -1,
    'CivilComments': -1
}

n_epoch_dict = {
    'CIFAR-10': 300, 
    'CIFAR-100': 300, 
    'Living-17': 450,
    'Nonliving-26': 450,
    'Entity-13': 300,
    'Entity-30': 300,
    'ImageNet': 10,
    'FMoW': 50,
    'RxRx1': 90,
    'Amazon': 3,
    'CivilComments': 5
}

pretrained_dict = {
    'CIFAR-10': False, 
    'CIFAR-100': False, 
    'Living-17': False,
    'Nonliving-26': False,
    'Entity-13': False,
    'Entity-30': False,
    'ImageNet': True,
    'FMoW': True,
    'RxRx1': True,
    'Amazon': True,
    'CivilComments': True
}

In [4]:
metrics = ['AC', 'DoC', 'IM', 'ATC', 'COT', 'COTT-L1']
dataset = 'CIFAR-10'
arch = 'resnet18'
n_sample = n_sample_dict[dataset]
seed = '1'
model_ckpt = n_epoch_dict[dataset]
pretrained = pretrained_dict[dataset]
results = sum([load_results(dataset, arch, pretrained, model_ckpt, metric, n_sample, seed) for metric in metrics], [])

In [5]:
import altair as alt
import pandas as pd

In [6]:
def get_corr_chart(data, subpop):
    corr = alt.Chart(alt.Data(values=data), title=subpop).mark_point(size=50, filled=True).encode(
        x=alt.X('metric:Q'),
        y=alt.X('error:Q', title='Test Error'),
        color=alt.Color('ref:N'),
        shape=alt.Color('ref:N'),
    ).properties(
        width=200,
        height=200
    )
    return corr

In [7]:
same_pop_results =  [i for i in results if i['subpopulation'] == 'same']
natural_pop_results =  [i for i in results if i['subpopulation'] == 'natural']
novel_pop_results = [i for i in results if i['subpopulation'] == 'novel']

In [8]:
line = pd.DataFrame({'metric': [0, 1], 'error': [0, 1]})
line_plot = alt.Chart(line).mark_line(color='black', strokeDash=[5, 8]).encode(
    x='metric',
    y='error',
)

In [9]:
same_corr = get_corr_chart(same_pop_results, 'same')
same_plt = same_corr + line_plot

natural_corr = get_corr_chart(natural_pop_results, 'natural')
natural_plt = natural_corr + line_plot

novel_corr = get_corr_chart(novel_pop_results, 'novel')
novel_plt = novel_corr + line_plot

plt = same_plt | natural_plt | novel_plt

In [10]:
plt.configure_axis(
    labelFontSize=14,
    titleFontSize=16
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_legend(
    titleFontSize=14,
    labelFontSize=16
) 


  for col_name, dtype in df.dtypes.iteritems():


In [11]:
get_corr_chart(results, 'all')

In [143]:
def polyfit(x, y, degree=1):
    results = {}

    coeffs = np.polyfit(x, y, degree)

    results['polynomial'] = coeffs.tolist()

    p = np.poly1d(coeffs)

    yhat = p(x)                
    ybar = np.sum(y)/len(y)          
    ssreg = np.sum((yhat - ybar)**2)   
    sstot = np.sum((y - ybar)**2)    
    results['determination'] = ssreg / sstot

    return results

In [144]:
from scipy.stats import spearmanr
import math

In [145]:
def compute_corr_stats(results):
    if len(results) == 0:
        return
    
    d = [i['metric'] for i in results]
    e = [i['error'] for i in results]
    
    if len(results) > 1:
        print(f'{metric} r2:', polyfit(d, e)['determination'])

        coef, p = spearmanr(d, e)
        print(f'{metric} rho:', coef)

        print(f'{metric} slope:', polyfit(d, e)['polynomial'][0])
        print(f'{metric} bias:', polyfit(d, e)['polynomial'][1])

    yhat = np.array(d)
    y = np.array(e)

    print('MAE:', np.abs(yhat - y).mean() * 100)

In [146]:
print("----- same pop results -----")
compute_corr_stats(same_pop_results)

----- same pop results -----
COTT-L1 r2: 0.9937388822276861
COTT-L1 rho: 0.9958898534997288
COTT-L1 slope: 1.1162806946609638
COTT-L1 bias: -0.10397030187054702
MAE: 3.0803742515357824


In [147]:
print("----- natural pop results -----")
compute_corr_stats(natural_pop_results)

----- natural pop results -----
COTT-L1 r2: 0.9932323050979855
COTT-L1 rho: 1.0
COTT-L1 slope: 1.1280858521759163
COTT-L1 bias: -0.08350273009252131
MAE: 2.9845745450258256


In [148]:
print("----- novel pop results -----")
compute_corr_stats(novel_pop_results)

----- novel pop results -----


In [149]:
print("----- novel pop results -----")
compute_corr_stats([i for i in novel_pop_results if i['corruption level'] == 0 ])

----- novel pop results -----
