In [1]:
import numpy as np
import pickle
from learnable_crf import LearnableCrf
from scipy.special import expit as sigmoid

In [2]:
r = 90
with open('cache/df_train.{}.pickle'.format(r), mode='rb') as h:
    df = pickle.load(h)
leaves = np.nonzero([x[0] in x[1] for x in zip(df['label'], df['pseudo_label'])])[0]
Y_train = df['label'][leaves]
with open('cache/df_val_test.pickle', mode='rb') as h:
    df_val, df_test = pickle.load(h)
Y_val = df_val['label']
Y_test = df_test['label']
with open('cache/hex.pickle', mode='rb') as h:
    hex_data = pickle.load(h)
state_space = hex_data['state_space']

In [3]:
iter_Phi_train = np.load('results/iter_Phi_train.{}.npy'.format(r))[:, leaves]
iter_Phi_val = np.load('results/iter_Phi_val.{}.npy'.format(r))
iter_Phi_test = np.load('results/iter_Phi_test.{}.npy'.format(r))

In [4]:
def get_accuracy(Y_predict, Y_truth, lim_states=False):
    if Y_predict.dtype == bool:
        return float(np.count_nonzero(Y_predict[np.arange(len(Y_predict)), Y_truth])) / len(Y_predict)
    if lim_states:
        return float(np.count_nonzero(Y_predict[:, :20].argmax(axis=1) == Y_truth)) / len(Y_predict)
    return float(np.count_nonzero(Y_predict.argmax(axis=1) == Y_truth)) / len(Y_predict)
def confusion_matrix(Y_predict, Y_truth):  # crf only
    cm = np.zeros((20, 27), dtype=int)
    count = np.zeros(20, dtype=int)
    for i, y in enumerate(Y_predict):
        cm[Y_truth[i], :] += y
        count[Y_truth[i]] += 1
    return cm.astype(float) / count[:, None]
def raw_crf(Phi):
    def step(phi):
        scores = map(lambda s: phi[s].sum(), state_space)
        return state_space[np.argmax(scores)]
    return np.array(map(step, Phi), dtype=bool)

In [5]:
iter_Phi_train = sigmoid(iter_Phi_train)
iter_Phi_val = sigmoid(iter_Phi_val)
iter_Phi_test = sigmoid(iter_Phi_test)

In [6]:
lcrf = [LearnableCrf(iter_Phi_train[i], Y_train) for i in range(0, 10)]

In [7]:
[get_accuracy(lcrf[i].predict(iter_Phi_val[i]), Y_val) for i in range(0, 10)]

[0.2660332541567696,
 0.3016627078384798,
 0.31116389548693585,
 0.3147268408551069,
 0.3159144893111639,
 0.3159144893111639,
 0.3141330166270784,
 0.3147268408551069,
 0.3147268408551069,
 0.3147268408551069]

In [8]:
[get_accuracy(lcrf[i].predict(iter_Phi_test[i]), Y_test) for i in range(0, 10)]

[0.25950118764845603,
 0.29275534441805223,
 0.3087885985748218,
 0.3171021377672209,
 0.31294536817102137,
 0.31294536817102137,
 0.31294536817102137,
 0.31294536817102137,
 0.31294536817102137,
 0.31294536817102137]