Evaluating calibration methods on convolutional neural networks.

In [1]:
import numpy as np
import pandas as pd
from os.path import join
from cal_methods import TemperatureScaling, evaluate, softmax
import pickle
from sklearn.isotonic import IsotonicRegression

Using TensorFlow backend.


In [None]:
mypath = '/Users/wildflowerlyi/Desktop/Github/NN_calibration/scripts'
myfiles = ('resnet_cifar/probs_resnet110_c10_logits.p', 'resnet_cifar/probs_resnet110_c100_logits.p'
        #, 'resnet_wide/probs_resnet_wide32_c10_logits.p','resnet_wide/probs_resnet_wide32_c100_logits.p'
        #, 'resnet_densenet/probs_densenet40_c10_logits.p','resnet_densenet/probs_densenet40_c100_logits.p'
        #,'resnet_wide/probs_resnet_wide32_c10clip_logits.p','resnet_wide/probs_resnet_wide32_c100clip_logits.p'
        )

In [2]:
PATH = '/Users/wildflowerlyi/Desktop/Github/NN_calibration/scripts'
files = ('resnet_cifar/probs_resnet110_c10_logits.p'
        , 'resnet_wide/probs_resnet_wide32_c10_logits.p'
        #, 'resnet_densenet/probs_densenet40_c10_logits.p'
        #,'resnet_wide/probs_resnet_wide32_c10clip_logits.p'
        #,'resnet_sd/probs_resnet110_SD_c10clip_logits.p'
        #,'resnet_cifar/probs_resnet110_c100_logits.p'
        #,'resnet_wide/probs_resnet_wide32_c100_logits.p'
        #,'resnet_densenet/probs_densenet40_c100_logits.p'
        #,'resnet_wide/probs_resnet_wide32_c100clip_logits.p'    
        )

In [None]:
#myfilepath = join(mypath, myfiles)

In [None]:
#myfilepath

In [3]:
# Open file with pickled variables
def unpickle_probs(filepath, verbose = 0):
    with open(filepath, 'rb') as f:  
        (y_probs_val, y_val), (y_probs_test, y_test) = pickle.load(f)  # unpickle the content
        
    if verbose:    
        print("y_probs_val:", y_probs_val.shape)  # (5000, 10); Validation set probabilities of predictions
        print("y_true_val:", y_val.shape)  # (5000, 1); Validation set true labels
        print("y_probs_test:", y_probs_test.shape)  # (10000, 10); Test set probabilities
        print("y_true_test:", y_test.shape)  # (10000, 1); Test set true labels
        
    return ((y_probs_val, y_val), (y_probs_test, y_test))

In [None]:
#(y_probs_val, y_val), (y_probs_test, y_test) = unpickle_probs(myfilepath, True)

In [None]:
for f in files:
    #print(f)
    #print(files[:][:])
    print(join(PATH,f))
    #(logits_val, y_val), (logits_test, y_test) = unpickle_probs(join(mypath,f))

In [None]:
for i, f in enumerate(files):
    #print(f)
    #print(files[:][:])
    print i, f
    print(join(PATH,f[:0]))
    #(logits_val, y_val), (logits_test, y_test) = unpickle_probs(join(PATH,f[0:]))

In [4]:
def cal_results(fn, path, files, m_kwargs = {}, approach = "all"):
    
    df = pd.DataFrame(columns=["Name", "Error", "ECE", "MCE", "Loss"])
    
    for i, f in enumerate(files):
        name = "_".join(f.split("_")[1:-1])
        print(name)
        filepath = join(path,f[0:])
        (logits_val, y_val), (logits_test, y_test) = unpickle_probs(filepath)
        
        if approach == "all":            

            y_val = y_val.flatten()

            model = fn(**m_kwargs)

            model.fit(logits_val, y_val)

            probs_test = model.predict(logits_test)
            
            # Replace NaN with epsilon close to zero, as it should be close to zero 
            idx_nan = np.where(np.isnan(probs_test))
            probs_test[idx_nan] = 0.00000000000000000000000000000000000000001
            
            error, ece, mce, loss = evaluate(softmax(logits_test), y_test, verbose=True)  # Test before scaling
            error2, ece2, mce2, loss2 = evaluate(probs_test, y_test, verbose=False)
            
        else:  # 1-vs-k models
            probs_val = softmax(logits_val)  # Softmax logits
            probs_test = softmax(logits_test)
            K = probs_test.shape[1]
            
            # Replace NaN with epsilon close to zero, as it should be close to zero 
            idx_nan = np.where(np.isnan(probs_test))
            probs_test[idx_nan] = 0.00000000000000000000000000000000000000001

            idx_nan = np.where(np.isnan(probs_val))
            probs_val[idx_nan] = 0.00000000000000000000000000000000000000001
            
            # Go through all the classes
            for k in range(K):
                # Prep class labels (1 fixed true class, 0 other classes)
                y_cal = np.array(y_val == k, dtype="int")[:, 0]

                # Train model
                model = fn(**m_kwargs)
                model.fit(probs_val[:, k], y_cal) # Get only one column with probs for given class "k"

                probs_test[:, k] = model.predict(probs_test[:, k])

            # Get results for test set
            error, ece, mce, loss = evaluate(softmax(logits_test), y_test, verbose=True, normalize=False)
            error2, ece2, mce2, loss2 = evaluate(probs_test, y_test, verbose=False, normalize=True)
                  
        df.loc[i*2] = [name, error, ece, mce, loss]
        df.loc[i*2+1] = [(name + "_calib"), error2, ece2, mce2, loss2]
        
    return df


In [None]:
#df_iso = cal_results(IsotonicRegression, mypath, myfiles, {'y_min':0, 'y_max':1}, approach = "single")

In [5]:
df_iso = cal_results(IsotonicRegression, PATH, files, {'y_min':0, 'y_max':1}, approach = "single")

cifar/probs_resnet110_c10
('Accuracy:', 93.390000000000001)
('Error:', 6.6099999999999994)
('ECE:', 0.048270407003164296)
('MCE:', 0.379260828194109)
('Loss:', 0.38292819397349553)
wide/probs_resnet_wide32_c10
('Accuracy:', 93.799999999999997)
('Error:', 6.2000000000000028)
('ECE:', 0.047311796060204525)
('MCE:', 0.3678059713045756)
('Loss:', 0.37100536326453859)
