In [1]:
import numpy as np
import pickle
from learnable_crf import LearnableCrf
from scipy.special import expit as sigmoid

In [16]:
r = 90
with open('cache/df_train.{}.pickle'.format(r), mode='rb') as h:
    df = pickle.load(h)
leaves = np.nonzero([x[0] in x[1] for x in zip(df['label'], df['pseudo_label'])])[0]
Y_train = df['label'][leaves]
with open('cache/df_val_test.pickle', mode='rb') as h:
    df_val, df_test = pickle.load(h)
Y_val = df_val['label']
Y_test = df_test['label']
with open('cache/hex.pickle', mode='rb') as h:
    hex_data = pickle.load(h)
state_space = hex_data['state_space']
state_space = filter(lambda x: x[:20].any(), state_space)

In [17]:
iter_Phi_train = np.load('results/iter_Phi_train.{}.npy'.format(r))[:, leaves]
iter_Phi_val = np.load('results/iter_Phi_val.{}.npy'.format(r))
iter_Phi_test = np.load('results/iter_Phi_test.{}.npy'.format(r))

In [4]:
def get_accuracy(Y_predict, Y_truth, lim_states=False):
    if Y_predict.dtype == bool:
        return float(np.count_nonzero(Y_predict[np.arange(len(Y_predict)), Y_truth])) / len(Y_predict)
    if lim_states:
        return float(np.count_nonzero(Y_predict[:, :20].argmax(axis=1) == Y_truth)) / len(Y_predict)
    return float(np.count_nonzero(Y_predict.argmax(axis=1) == Y_truth)) / len(Y_predict)
def top3_accuracy(Y_predict, Y_truth, lim_states=False):
    if Y_predict.dtype == bool:
        return np.count_nonzero([np.any(p[:, t]) for (p, t) in zip(Y_predict, Y_truth)]) / float(len(Y_predict))
    if lim_states:
        return np.count_nonzero([t in p[:20].argsort()[-3:] for (p, t) in zip(Y_predict, Y_truth)]) / float(len(Y_predict))
    return np.count_nonzero([t in p.argsort()[-3:] for (p, t) in zip(Y_predict, Y_truth)]) / float(len(Y_predict))

In [5]:
def raw_crf(Phi):
    def step(phi):
        scores = map(lambda s: phi[s].sum(), state_space)
        return np.vstack(tuple(state_space[np.argsort(scores)[i]] for i in range(-3, 0)))
#         return state_space[np.argmax(scores)]
    return np.array(map(step, Phi), dtype=bool)
def pn_crf(Phi):
    def step(phi):
        scores = map(lambda s: phi[s].sum() + ((1 - phi)[np.logical_not(s)]).sum(), state_space)
        return np.vstack(tuple(state_space[np.argsort(scores)[i]] for i in range(-3, 0)))
#         return state_space[np.argmax(scores)]
    return np.array(map(step, Phi), dtype=bool)

In [18]:
iter_Phi_train = sigmoid(iter_Phi_train)
iter_Phi_val = sigmoid(iter_Phi_val)
iter_Phi_test = sigmoid(iter_Phi_test)

In [None]:
[top3_accuracy(pn_crf(iter_Phi_val[i]), Y_val, lim_states=True) for i in range(0, 10)]

In [None]:
[top3_accuracy(pn_crf(iter_Phi_test[i]), Y_test, lim_states=True) for i in range(0, 10)]

In [19]:
lcrf = [LearnableCrf(iter_Phi_train[i], Y_train) for i in range(0, 10)]

In [20]:
[top3_accuracy(lcrf[i].predict_top3(iter_Phi_val[i]), Y_val) for i in range(0, 10)]

[0.47387173396674587,
 0.5623515439429929,
 0.5819477434679335,
 0.5831353919239906,
 0.583729216152019,
 0.584916864608076,
 0.5855106888361045,
 0.584916864608076,
 0.584916864608076,
 0.584916864608076]

In [21]:
[top3_accuracy(lcrf[i].predict_top3(iter_Phi_test[i]), Y_test) for i in range(0, 10)]

[0.4833729216152019,
 0.5754156769596199,
 0.5831353919239906,
 0.5932304038004751,
 0.5914489311163895,
 0.5914489311163895,
 0.5908551068883611,
 0.5914489311163895,
 0.5914489311163895,
 0.5914489311163895]

In [None]:
# def confusion_matrix(Y_predict, Y_truth):  # crf only
#     cm = np.zeros((20, 27), dtype=int)
#     count = np.zeros(20, dtype=int)
#     for i, y in enumerate(Y_predict):
#         cm[Y_truth[i], :] += y
#         count[Y_truth[i]] += 1
#     return cm.astype(float) / count[:, None]