In [1]:
from __future__ import division, print_function

In [34]:
import abstention
reload(abstention)
reload(abstention.calibration)
reload(abstention.label_shift)
from abstention.calibration import TempScaling, softmax
from abstention.label_shift import EMShiftWeightEstimator, BBSE
import glob
import gzip
import numpy as np
from collections import defaultdict

def read_labels(fh):
    to_return = []
    for line in fh:
        the_class=int(line.rstrip())
        to_add = np.zeros(10)
        to_add[the_class] = 1
        to_return.append(to_add)
    return np.array(to_return)

def read_preds(fh):
    return np.array([[float(x) for x in y.rstrip().split("\t")]
                     for y in fh])

def sample_from_probs_arr(arr_with_probs):
    rand_num = np.random.random()
    cdf_so_far = 0
    for (idx, prob) in enumerate(arr_with_probs):
        cdf_so_far += prob
        if (cdf_so_far >= rand_num
            or idx == (len(arr_with_probs) - 1)):  # need the
            # letterIdx==(len(row)-1) clause because of potential floating point errors
            # that mean arrWithProbs doesn't sum to 1
            return idx
        
test_labels = read_labels(gzip.open(glob.glob("test_labels.txt.gz")[0]))
test_class_to_indices = defaultdict(list)
for index,row in enumerate(test_labels):
    row_label = np.argmax(row)
    test_class_to_indices[row_label].append(index)
def draw_test_indices(total_to_return, label_proportions):
    indices_to_use = []
    for class_index, class_proportion in enumerate(label_proportions):
        indices_to_use.extend(np.random.choice(
                test_class_to_indices[class_index],
                int(total_to_return*class_proportion),
                replace=True))
    for i in range(total_to_return-len(indices_to_use)):
        class_index = sample_from_probs_arr(label_proportions)
        indices_to_use.append(
            np.random.choice(test_class_to_indices[class_index]))
    return indices_to_use

valid_labels = read_labels(gzip.open(glob.glob("valid_labels.txt.gz")[0]))

shift_weight_estimators = [
    ('em_calib-None', EMShiftWeightEstimator(calibrator_factory=None)),
    ('em_calib-tsnobiascorr', EMShiftWeightEstimator(calibrator_factory=TempScaling(verbose=False))),
    ('em_calib-tswithbiascorr', EMShiftWeightEstimator(calibrator_factory=
                                                       TempScaling(verbose=False,bias_positions=[0,1,2,3,4,5,6,7,8,9]))),
    ('bbse-hard_calib-None', BBSE(soft=False, calibrator_factory=None)),
    ('bbse-soft_calib-None', BBSE(soft=True, calibrator_factory=None)),
    ('bbse-soft_calib-tsnobiascorr', BBSE(soft=True, calibrator_factory=TempScaling(verbose=False))),
    ('bbse-soft_calib-tswithbiascorr', BBSE(soft=True, calibrator_factory=TempScaling(verbose=False,
                                                                                      bias_positions=[0,1,2,3,4,5,6,7,8,9]))),
    ('bbse-hard_calib-None', BBSE(soft=False)),
    ('bbse-hard_calib-tsnobiascorr', BBSE(soft=False, calibrator_factory=TempScaling(verbose=False))),
    ('bbse-hard_calib-tswithbiascorr', BBSE(soft=False, calibrator_factory=TempScaling(verbose=False,
                                                                                      bias_positions=[0,1,2,3,4,5,6,7,8,9]))),
]

dirichlet_alphas = [0.1, 1.0, 1.0]

In [35]:
import numpy as np
import random
import sys

test_preds = softmax(preact=read_preds(gzip.open(glob.glob("cifar10_balanced_seed-"+str(seed)+"_*testpreds.txt.gz")[0])),
                     temp=1, biases=None)
valid_preds = softmax(preact=read_preds(gzip.open(glob.glob("cifar10_balanced_seed-"+str(seed)+"_*validpreds.txt.gz")[0])),
                      temp=1, biases=None)

num_trials = 50

dirichletalpha_to_estimatorname_to_results = defaultdict(lambda: defaultdict(list))
for dirichlet_alpha in dirichlet_alphas:
    print("On alpha",dirichlet_alpha)
    for trial_num in range(num_trials):
        print("On trial num",trial_num)
        sys.stdout.flush()
        np.random.seed(trial_num*100)
        random.seed(trial_num*100)
        dirichlet_dist = np.random.dirichlet([dirichlet_alpha for x in range(10)])
        test_indices = draw_test_indices(total_to_return=10000,
                                         label_proportions=dirichlet_dist)
        shifted_test_labels = test_labels[test_indices]
        shifted_test_preds = test_preds[test_indices]
        ideal_shift_weights = np.mean(shifted_test_labels,axis=0)/np.mean(valid_labels,axis=0)
        for estimator_name,shift_weight_estimator in shift_weight_estimators:
            shift_weights = shift_weight_estimator(valid_labels=valid_labels,
                                   tofit_initial_posterior_probs=shifted_test_preds,
                                   valid_posterior_probs=valid_preds)  
            dirichletalpha_to_estimatorname_to_results[dirichlet_alpha][estimator_name].append(
                (shift_weights,ideal_shift_weights,np.linalg.norm(shift_weights-ideal_shift_weights)))
        for estimator_name in dirichletalpha_to_estimatorname_to_results[dirichlet_alpha]:
            print(estimator_name,
                  np.mean([x[2] for x in
                           dirichletalpha_to_estimatorname_to_results[dirichlet_alpha][estimator_name]]))

On alpha 0.1
On trial num 0
bbse-hard_calib-tsnobiascorr 0.2660934469597119
bbse-hard_calib-tswithbiascorr 0.21520811595465275
bbse-soft_calib-tsnobiascorr 0.21827879867437228
bbse-soft_calib-tswithbiascorr 0.18607329739807663
bbse-hard_calib-None 0.2660934469597119
em_calib-tsnobiascorr 0.14072584535014468
em_calib-tswithbiascorr 0.03191541185007465
em_calib-None 0.16760817754745813
bbse-soft_calib-None 0.2217670200154975
On trial num 1
bbse-hard_calib-tsnobiascorr 0.17865713103950137
bbse-hard_calib-tswithbiascorr 0.1533787251578435
bbse-soft_calib-tsnobiascorr 0.13982887427413968
bbse-soft_calib-tswithbiascorr 0.12600703894214624
bbse-hard_calib-None 0.17865713103950134
em_calib-tsnobiascorr 0.13231377092150648
em_calib-tswithbiascorr 0.06274409924712405
em_calib-None 0.13723327714034142
bbse-soft_calib-None 0.14289194340826628
On trial num 2
bbse-hard_calib-tsnobiascorr 0.14242255896018036
bbse-hard_calib-tswithbiascorr 0.11747939859935343
bbse-soft_calib-tsnobiascorr 0.10280637684

In [39]:
import json
import os
file_out = "label_shift_adaptation_results.json"
open(file_out, 'w').write(
    json.dumps(dict([(dirichlet_alpha,
                      dict([(estimator_name, [x[2] for x in results])
                             for (estimator_name,results) in
                             estimatorname_to_results.items()]))
                     for (dirichlet_alpha,estimatorname_to_results)
                     in dirichletalpha_to_estimatorname_to_results.items()]),
               sort_keys=True, indent=4, separators=(',', ': ')))
os.system("gzip -f "+file_out)

0