In [1]:
import pickle
with open('cache/df.0.pickle', mode='rb') as h:
    _, df_test = pickle.load(h)
with open('cache/hex.pickle', mode='rb') as h:
    hex_data = pickle.load(h)

In [2]:
import numpy as np
from collections import Counter
labels = df_test['label']
label_counts = Counter(labels).values()  # used as dict<label, #_of_occurrence>
def get_accuracy(Y, normalize=True):
    """
    Calculates the leaf accuracy of a prediction, decomposed to labels.
    For boolean y from CRF, leaf nodes compete in state space; for numerical y, leaf nodes compete explicitly
    Args:
        Y: N * D matrix of prediction. Data type may be either numerical or boolean.
        normalize: if False, returns positive counts instead of accuracy.
    """
    is_bool = Y.dtype == bool
    by_label = np.zeros(20, dtype=int)
    for i in range(0, len(Y)):
        l, y = labels[i], Y[i]
        if (is_bool and y[l]) or (not is_bool and np.argmax(y[:20]) == l):
            by_label[l] += 1
    overall = by_label.sum()
    if normalize:
        by_label = by_label.astype(float) / label_counts
        overall = float(overall) / len(Y)
    return by_label, overall

In [3]:
def get_opt_model(model_Y):
    scores = [get_accuracy(Y)[1] for Y in model_Y]
    opt_model = np.argmax(scores)
    return opt_model, scores[opt_model]

In [4]:
D = len(hex_data['id_name'])
state_space = filter(lambda x: x[:20].any(), hex_data['state_space'])  # limit state space to leaf node
def to_crf(Y, pos_neg):
    def to_crf_step(y):
        if pos_neg:
            scores = map(lambda s: np.log(y[s]).sum() + np.log(1 - y[np.logical_not(s)]).sum(), state_space)
        else:
            scores = map(lambda s: np.log(y[s]).sum(), state_space)
        return state_space[np.argmax(scores)]
    return np.array(map(to_crf_step, Y), dtype=bool)

In [8]:
from scipy.special import expit as sigmoid
# caffe
iter_Y = np.load('results/test_caffe.0.npy')
iter_Y = sigmoid(iter_Y)  # to probability
opt_iter, raw_acc = get_opt_model(iter_Y)
crf_acc = get_accuracy(to_crf(iter_Y[opt_iter], pos_neg=False))[1]
pncrf_acc = get_accuracy(to_crf(iter_Y[opt_iter], pos_neg=True))[1]
print 'iter={}, raw={}, crf={}, p&n_crf={}'.format(opt_iter, raw_acc, crf_acc, pncrf_acc)

iter=7, raw=0.724465558195, crf=0.725059382423, p&n_crf=0.722684085511


In [11]:
# svm
kernel_Y = np.load('results/test_svm.0.prob.npy')
opt_kernel, raw_acc = get_opt_model(kernel_Y)
crf_acc = get_accuracy(to_crf(kernel_Y[opt_kernel], pos_neg=False))[1]
pncrf_acc = get_accuracy(to_crf(kernel_Y[opt_kernel], pos_neg=True))[1]
print 'kernel={}, raw={}, crf={}, p&n_crf={}'.format(opt_kernel, raw_acc, crf_acc, pncrf_acc)

kernel=2, raw=0.766627078385, crf=0.764251781473, p&n_crf=0.763657957245
