In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import time
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.getcwd())))

t1 = time.time()
# os.environ['THEANO_FLAGS'] = "device=cuda1"

In [None]:
import shelve
from pprint import pprint
import numpy as np
import lasagne
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import matplotlib as mpl
import matplotlib.cm
from lproc import subset, rmap
from sltools.utils import gloss2seq, seq2gloss
from sltools.nn_utils import onehot, jaccard, compute_scores
from sltools.postproc import optimize_boundaries, filter_longshort

from experiments.hmmvsrnn_reco.a_data import durations, gloss_seqs, tmpdir, \
   train_subset, val_subset, test_subset, vocabulary
from experiments.hmmvsrnn_reco.utils import autoreload_feats, reload_best_hmm

In [None]:
if 'EXPERIMENT_NAME' in os.environ:
    experiment_name = os.environ['EXPERIMENT_NAME']
else:
    experiment_name = "hmm_transfer_skelinp"

report = shelve.open(os.path.join(tmpdir, experiment_name))

model = report['meta']['model']
modality = report['meta']['modality']
variant = report['meta']['variant']
date = report['meta']['date']
notes = report['meta']['notes']
experiment_name = report['meta']['experiment_name']
args = report['args']

# Reload dataset

In [None]:
feat_seqs = autoreload_feats(modality, **args['encoder_kwargs'])

feat_seqs_train = [subset(f, train_subset) for f in feat_seqs]
gloss_seqs_train = subset(gloss_seqs, train_subset)
durations_train = subset(durations, train_subset)
targets_train = rmap(lambda g, d: gloss2seq(g, d, 0),
                     gloss_seqs_train, durations_train)
feat_seqs_val = [subset(f, val_subset) for f in feat_seqs]
gloss_seqs_val = subset(gloss_seqs, val_subset)
durations_val = subset(durations, val_subset)
targets_val = rmap(lambda g, d: gloss2seq(g, d, 0),
                    gloss_seqs_val, durations_val)
feat_seqs_test = [subset(f, test_subset) for f in feat_seqs]
gloss_seqs_test = subset(gloss_seqs, test_subset)
durations_test = subset(durations, test_subset)
targets_test = rmap(lambda g, d: gloss2seq(g, d, 0),
                    gloss_seqs_test, durations_test)

# Training report and model

In [None]:
plt.figure(figsize=(8, 4))

i = 0
for e in sorted([e for e in report.keys() if e.startswith('epoch')]):
    r = report[e]    
    plt.plot(np.arange(i, i + len(r['epoch_losses'])), r['epoch_losses'], c='blue')
    plt.scatter([i + len(r['epoch_losses']) - 1], [r['epoch_losses'][-1]], 
                marker='x', c='red', alpha=.5)
    
    i += len(r['epoch_losses'])

plt.gca().set_yscale("log")
plt.show()

best_epoch, recognizer, previous_recognizer = reload_best_hmm(report)

# Performances

In [None]:
def plot_perfs(perf_report, chains_lengths):
    plt.figure(figsize=(14, 7))

    ax = plt.subplot2grid((2, 3), (0, 0))
    cmap = matplotlib.cm.viridis
    cmap.set_bad(cmap(0.001))
    plt.imshow(perf_report['confusion'] / perf_report['confusion'].sum(axis=1, keepdims=True), 
               interpolation='none', 
               cmap=cmap, clim=(0.001, 1), norm=LogNorm(vmin=0.001, vmax=1))
    plt.gca().set_xticklabels([])
    plt.gca().set_yticklabels([])
    ax.set_title("HMM Jaccard/Framewise : {:5.4f}/{:5.4f}".format(
        perf_report['jaccard'], perf_report['framewise']))

    ax = plt.subplot2grid((2, 3), (1, 0))
    cmap = matplotlib.cm.viridis
    cmap.set_bad(cmap(0.001))
    plt.imshow(perf_report['posterior_confusion'] / perf_report['posterior_confusion'].sum(axis=1, keepdims=True), 
               interpolation='none', 
               cmap=cmap, clim=(0.001, 1), norm=LogNorm(vmin=0.001, vmax=1))
    plt.gca().set_xticklabels([])
    plt.gca().set_yticklabels([])
    ax.set_title("Posterior Jaccard/Framewise : {:5.4f}/{:5.4f}".format(
        perf_report['posterior_jaccard'], perf_report['posterior_framewise']))

    ax = plt.subplot2grid((2, 3), (0, 1), colspan=2, rowspan=2)
    cmap = matplotlib.cm.viridis
    cmap.set_bad(cmap(0.001))
    plt.imshow(perf_report['statewise_confusion'] / perf_report['statewise_confusion'].sum(axis=1, keepdims=True), 
               interpolation='none', 
               cmap=cmap, clim=(0.001, 1), norm=LogNorm(vmin=0.001, vmax=1))
    plt.gca().set_xticklabels([])
    plt.gca().set_yticklabels([])
    plt.gca().set_xticks(np.cumsum(chains_lengths) - .5)
    plt.gca().set_yticks(np.cumsum(chains_lengths) - .5)
    plt.gca().grid(color='gray', linestyle='dotted')
    ax.set_title("State-wise framewise: {:5.4f}".format(perf_report['statewise_framewise']))
    
    plt.colorbar()
    
    plt.show()

In [None]:
plot_perfs(report[best_epoch]['train_scores'], recognizer.chains_lengths)
plot_perfs(report[best_epoch]['val_scores'], recognizer.chains_lengths)

# Error

In [None]:
targets_train = [gloss2seq(g_, d_, 0) for g_, d_ in zip(gloss_seqs_train, durations_train)]
predictions_train = recognizer.predict(feat_seqs_train)
ji_train, framewise_train, confusion_train = compute_scores(predictions_train, targets_train, vocabulary)

targets_val = [gloss2seq(g_, d_, 0) for g_, d_ in zip(gloss_seqs_val, durations_val)]
predictions_val = recognizer.predict(feat_seqs_val)
ji_val, framewise_val, confusion_val = compute_scores(predictions_val, targets_val, vocabulary)

In [None]:
# distribution of errors

scores = [jaccard(onehot(l, vocabulary), onehot(p, vocabulary))
         for l, p in zip(targets_val, predictions_val)]

plt.figure()
plt.hist(scores, np.linspace(0.5, 1, 40))
plt.title("Histogram of sequence-wise JI")
plt.show()

In [None]:
# nb of false positives out of sequence vocabulary
np.mean([len(set(p_) - set(l_)) for p_, l_ in zip(predictions_val, targets_val)], axis=0)

In [None]:
# confusion types
cum_err = np.sum(confusion_val, axis=1) - np.diag(confusion_val)
print("false pos: {}  false neg: {}, mis-class: {}".format(
    cum_err[0], np.sum(confusion_val[1:, 0]), np.sum(cum_err[1:]) - np.sum(confusion_val[1:, 0])))

In [None]:
# Plot the posteriors of the _correct_ states in color and other states in gray

def preview_seq(proba, gloss):
    plt.figure(figsize=(15, 3))
    cmap = plt.cm.summer(np.linspace(0, 1, 5))
    
    pre_start = 0
    for lbl, start, stop in gloss:
        for i in range(proba.shape[1] - 1):
            plt.plot(np.arange(pre_start, start), proba[pre_start:start, i], ls=':', c="gray")
        plt.plot(np.arange(pre_start, start), proba[pre_start:start, -1], c="purple")
        for a in range(0, (lbl - 1) * 5):
            plt.plot(np.arange(start, stop), proba[start:stop, a], ls=":", c='gray')
        for a in range(5):
            plt.plot(np.arange(start, stop), proba[start:stop, (lbl - 1) * 5 + a], c=cmap[a])
        for a in range(lbl * 5, proba.shape[1]):
            plt.plot(np.arange(start, stop), proba[start:stop, a], ls=":", c='gray')
        pre_start = stop
    
    for i in range(proba.shape[1] - 1):
        plt.plot(np.arange(pre_start, len(proba)), proba[pre_start:len(proba), i], ls=':', c="gray")
    plt.plot(np.arange(pre_start, len(proba)), proba[pre_start:len(proba), -1], c="purple")

    
seq = 15

preview_seq(
    recognizer.posterior.predict_proba(*[f[seq] for f in feat_seqs_val]),
    gloss_seqs_val[seq])

plt.gca().set_xlim((200, 900))
plt.show()

In [None]:
# correlate error with predicted gloss duration

plt.figure(figsize=(9, 4))

prediction_accuracy = [np.sum(l[start:stop] == g) 
            for p, l in zip(predictions_val, targets_val)
            for (g, start, stop) in seq2gloss(p)
            if g != 0]
none_accuracy = [np.sum(l[start:stop] == 0)
            for p, l in zip(predictions_val, targets_val)
            for (g, start, stop) in seq2gloss(p)
            if g != 0]
gloss_d = [stop - start
           for p in predictions_val 
           for (g, start, stop) in seq2gloss(p)
           if g != 0]

scores_pred = np.zeros((int(np.ceil(max(gloss_d) / 5 + 0.0001)),))
scores_none = np.zeros((int(np.ceil(max(gloss_d) / 5 + 0.0001)),))
total_d = np.zeros((int(np.ceil(max(gloss_d) / 5 + 0.0001)),))
for vp, vn, d in zip(prediction_accuracy, none_accuracy, gloss_d):
    idx = int(d / 5)
    scores_pred[idx] += vp
    scores_none[idx] += vn
    total_d[idx] += d

plt.gca().bar(np.arange(0, int(np.ceil(max(gloss_d) + 0.0001)), 5), 
              scores_pred / total_d,
              width=5,
              alpha=.5)
plt.gca().bar(np.arange(0, int(np.ceil(max(gloss_d) + 0.0001)), 5), 
              scores_none / total_d,
              width=5,
              alpha=.5)

plt.legend(["predicted class", "non-gesture class"])
plt.xlabel("subsequence duration (based on model prediction)")
plt.ylabel("accuracy")
plt.show()

# Score

In [None]:
# Optimize duration filter

boundaries = optimize_boundaries(targets_val, predictions_val, vocabulary, (30, 100, 301))
print("Optimal range: ", boundaries)
print("FYI the score without is: {:.4f}".format(ji_val))

In [None]:
# Validation score

ji_filtered_val, accuracy_filtered_val, confusion_filtered_val = compute_scores(
    [filter_longshort(p, boundaries, 0) for p in predictions_val], 
    targets_val, vocabulary)
print("validation score: {:.4f}".format(ji_filtered_val))

In [None]:
# Test score

targets_test = [gloss2seq(g_, d_, 0) for g_, d_ in zip(gloss_seqs_test, durations_test)]
predictions_test = recognizer.predict(feat_seqs_test)

ji_filtered_test, accuracy_filtered_test, confusion_filtered_test = compute_scores(
    [filter_longshort(p, boundaries, 0) for p in predictions_test], 
    targets_test, vocabulary)
print("testing score: {:.4f}".format(ji_filtered_test))

In [None]:
# Recap:

print("Accuracy:   {:.4f} / {:.4f} / ?".format(framewise_train, framewise_val))
print("JI:         {:.4f} / {:.4f} / ?".format(ji_train, ji_val))
print("Acc. filt.:      ? / {:.4f} / {:.4f}".format(accuracy_filtered_val, accuracy_filtered_test))
print("JI filt.:        ? / {:.4f} / {:.4f}".format(ji_filtered_val, ji_filtered_test))

recap = {
    "experiment_name": experiment_name,
    "best_epoch": best_epoch,
    "accuracy": (framewise_train, framewise_val, None),
    "ji": (ji_train, ji_val, None),
    "confusion": (confusion_train, confusion_val, None),
    "accuracy_filtered": (None, accuracy_filtered_val, accuracy_filtered_test),
    "ji_filtered": (None, ji_filtered_val, ji_filtered_test),
    "confusion_filtered": (None, confusion_filtered_val, None),
}
report['analysis'] = recap

# Model

In [None]:
print(report['args']['encoder_kwargs'])

In [None]:
from sklearn.manifold import TSNE
from sltools.tconv import TemporalConv

l = None
for l_ in lasagne.layers.get_all_layers(recognizer.posterior.l_feats):
    if isinstance(l_, TemporalConv):
        l = l_

W1 = l.W.eval()
W1 = W1.transpose((0, 2, 1)).reshape((-1, W1.shape[1]))
Y = np.linalg.norm(W1, axis=1)
i = np.argsort(Y)
W1 = np.stack([W1[i_] for i_ in i])[-300:]
model = TSNE(n_components=1, metric='euclidean')
Y = model.fit_transform(W1)[:, 0]
i = np.argsort(Y)
W1 = W1[i]
Y = Y[i]

plt.figure(figsize=(5, 20))
x, y = np.meshgrid(np.arange(W1.shape[1]), Y)
plt.imshow(W1, clim=(-np.abs(W1).max(), np.abs(W1).max()), cmap='bwr')
plt.gca().set_aspect("auto")
plt.axis([-1, W1.shape[1], -1, W1.shape[0]])

In [None]:
t2 = time.time()
print(t2 - t1)

In [None]:
# l = lasagne.layers.get_all_layers(recognizer.posterior.l_feats)[2]
# W = np.asarray(l.W.eval())

# nrows, ncols = int(np.ceil(np.sqrt(W.shape[0] + 1))), int(np.floor(np.sqrt(W.shape[0] + 1)))
# img = np.zeros((nrows * (W.shape[2] + 1), ncols * (W.shape[3] + 1)))
# for k in range(W.shape[0]):
#     i, j = k // ncols, k % ncols
#     y, x = i * (W.shape[2] + 1), j * (W.shape[3] + 1)
#     tmp = img[y:y + W.shape[2]]
#     img[y:y + W.shape[2], x:x + W.shape[3]] = W[k, 0]

# plt.imshow(img, clim=(-np.abs(W).max(), np.abs(W).max()), cmap='bwr')
# plt.colorbar()

In [None]:
# lbl, cnt = np.unique(
#     np.concatenate([gloss2seq(g_, len(r_), 0) for g_, r_ in zip(gloss_seqs_val, feat_seqs_val)]),
#     return_counts=True)

In [None]:
# plt.bar(x=lbl+.5, height=cnt, log=True)
# plt.gca().set_ylim((1, 3e5))

In [None]:
# plt.bar(x=lbl+.5, height=cnt, log=False)
# plt.gca().set_ylim((1, 2e5))
# plt.show()

In [None]:
# sum([len(gseq) for gseq in gloss_seqs_val])