In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import os
import sys
sys.path.append(os.path.normpath(os.path.join(os.getcwd(), '..')))
os.environ['THEANO_FLAGS'] = "device=cuda1"

In [None]:
import shelve
import pickle as pkl
from pprint import pprint
import numpy as np
import lasagne
from sklearn.metrics import confusion_matrix
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import SymLogNorm
from matplotlib.mlab import bivariate_normal
import matplotlib.cm
from lproc import subset, rmap
from datasets.utils import gloss2seq, seq2gloss
from sltools.nn_utils import compute_scores, jaccard, onehot
from sltools.models.rnn import build_predict_fn
from sltools.postproc import optimize_boundaries, filter_longshort

# Reload dataset

In [None]:
nlabels = 21

from experiments.ch14_skel.a_data import durations, gloss_seqs, tmpdir, \
    train_subset, val_subset, test_subset, vocabulary
from experiments.ch14_skel.b_preprocess import feat_seqs
from experiments.ch14_skel.c_models import build_lstm
feat_seqs = rmap(lambda x: (x,), feat_seqs)

# from experiments.ch14_bgr.a_data import durations, gloss_seqs, tmpdir, \
#     train_subset, val_subset, test_subset, vocabulary
# from experiments.ch14_bgr.b_preprocess import feat_seqs
# feat_seqs = rmap(lambda x: (x,), feat_seqs)

# from experiments.ch14_fusion.a_data import durations, gloss_seqs, tmpdir, \
#     train_subset, val_subset, test_subset, vocabulary
# from experiments.ch14_fusion.b_preprocess import feat_seqs

feats_seqs_train = subset(feat_seqs, train_subset)
gloss_seqs_train = subset(gloss_seqs, train_subset)
durations_train = subset(durations, train_subset)

feats_seqs_val = subset(feat_seqs, val_subset)
gloss_seqs_val = subset(gloss_seqs, val_subset)
durations_val = subset(durations, val_subset)

feats_seqs_test = subset(feat_seqs, test_subset)
gloss_seqs_test = subset(gloss_seqs, test_subset)
durations_test = subset(durations, test_subset)

# Reload Models

In [None]:
# HMM
# hmm_report = shelve.open(os.path.join(tmpdir, "..", "..", "ch14_skel_hmm_alpha07", "cache", "hmm_report"))
hmm_report = shelve.open(os.path.join(tmpdir, "hmm_report"))

all_batch_losses = []
all_epoch_losses = []
n_epochs = []
best_phase = 0
best_score = 0
for i in sorted(map(int, hmm_report.keys())):
    r = hmm_report[str(i)]
    all_batch_losses += r['batch_losses']
    all_epoch_losses += r['epoch_losses']
    if r['val_report']['jaccard'] > best_score:
        best_phase = i
        best_score = r['val_report']['jaccard']

hmm_phase_report = hmm_report[str(best_phase)]
hmm_recognizer = hmm_phase_report['model']

labels = [gloss2seq(g_, d_, 0) for g_, d_ in zip(gloss_seqs_val, durations_val)]
preds = hmm_recognizer.predict(feats_seqs_val)
hmm_boundaries = optimize_boundaries(labels, preds, vocabulary, (30, 150, 300))
print("Optimal range: ", hmm_boundaries)
ji_before = np.mean([jaccard(onehot(l, vocabulary), onehot(p, vocabulary))
                     for l, p in zip(labels, preds)])
ji_after = np.mean([jaccard(onehot(l, vocabulary), 
                            onehot(filter_longshort(p, hmm_boundaries, 0), vocabulary))
                    for l, p in zip(labels, preds)])
print("JI: {:.4f} -> {:.4f}".format(ji_before, ji_after))
print(hmm_phase_report['val_report']['jaccard'])

In [None]:
# RNN
max_time = 128
batch_size = 16
rnn_report = shelve.open(os.path.join(tmpdir, "rnn_report"))
best_epoch = sorted([(float(rnn_report[str(e)]['val_scores']['jaccard']), int(e))
                     for e in rnn_report.keys() if 'val_scores' in rnn_report[str(e)].keys()])[-1][1]

rnn_epoch_report = rnn_report[str(best_epoch)]

input_shape = tuple([x.shape[1:] for x in feat_seqs[0]])

model = build_lstm(*input_shape,
                   batch_size=batch_size, max_time=max_time)

all_layers = lasagne.layers.get_all_layers(model['l_linout'])
with open(os.path.join(tmpdir, "rnn_it{:04d}.pkl".format(best_epoch)), 'rb') as f:
    params = pkl.load(f)
    lasagne.layers.set_all_param_values(all_layers, params)

rnn_predict_fn = build_predict_fn(model, batch_size, max_time, nlabels, model['warmup'])

labels = [gloss2seq(g_, d_, 0) for g_, d_ in zip(gloss_seqs_val, durations_val)]
preds = rmap(lambda x: np.argmax(x, axis=1),
             rnn_predict_fn(rmap(lambda x: x[0], feats_seqs_val)))
rnn_boundaries = optimize_boundaries(labels, preds, vocabulary, (30, 150, 300))
print("Optimal range: ", rnn_boundaries)
ji_before = np.mean([jaccard(onehot(l, vocabulary), onehot(p, vocabulary))
                     for l, p in zip(labels, preds)])
ji_after = np.mean([jaccard(onehot(l, vocabulary), 
                            onehot(filter_longshort(p, rnn_boundaries, 0), vocabulary))
                    for l, p in zip(labels, preds)])
print("JI: {:.4f} -> {:.4f}".format(ji_before, ji_after))
print(rnn_epoch_report['val_scores']['jaccard'])

# Compare performances

In [None]:
hmm_preds = hmm_recognizer.predict(feats_seqs_val)
hmm_preds = [filter_longshort(p, hmm_boundaries, 0) for p in hmm_preds]
rnn_preds = rmap(lambda x: np.argmax(x, axis=1),
                 rnn_predict_fn(rmap(lambda x: x[0], feats_seqs_val)))
rnn_preds = [filter_longshort(p, rnn_boundaries, 0) for p in rnn_preds]

In [None]:
targets_val = [gloss2seq(gseq, d, 0) for gseq, d in zip(gloss_seqs_val, durations_val)]

hj, hf, hc = compute_scores(hmm_preds, targets_val, vocabulary)
rj, rf, rc = compute_scores(rnn_preds, targets_val, vocabulary)

In [None]:
diff = hc / hc.sum(axis=1, keepdims=True) - rc / hc.sum(axis=1, keepdims=True)

plt.figure()
plt.imshow(diff, cmap='RdYlBu', clim=(-.07, .07))

plt.colorbar()
plt.title(r"$\dfrac{C_{ij}^{hmm} - C_{ij}^{rnn}}{C_{i}} \quad where \quad C_{ij} = \#\left(ŷ=j \, | \, y=i\right)$",
          y=-.3)
plt.xlabel("predictions")
plt.ylabel("targets")
plt.yticks(np.arange(21))
plt.xticks(np.arange(0, 21, 1))
plt.gca().set_yticklabels([
    '∅','vattene','vieniqui','perfetto','furbo','cheduepalle','chevuoi','daccordo',
    'seipazzo','combinato','freganiente','ok','cosatifarei','basta','prendere',
    'noncenepiu','fame','tantotempo','buonissimo','messidaccordo','sonostufo'])
plt.gca().set_xticklabels([])
plt.show()

In [None]:
np.sum(np.diag(hc - rc)) / hc.sum()

In [None]:
x = hc - np.diag(np.diag(hc))
np.sum(x[1:, 1:]) / np.sum(hc[1:, 1:])

In [None]:
x = rc - np.diag(np.diag(rc))
np.sum(x[1:, 1:]) / np.sum(rc[1:, 1:])

In [None]:
# Error types:
# too soon: 1
# too late: 2
# misclassification: 3
# false negative: 4
# false positive (if not too soon or too late): 5
# other: 6
#
# Error descriptor: (rec, time, type, quantity)

acceptance_ratio = .5  # matching ratio over the target subsequence to accept detection

def triage_errors(preds, gloss_seqs, durations):
    errors = []
    
    for i, (pseq, gseq, d) in enumerate(zip(preds, gloss_seqs, durations)):
        l = gloss2seq(gseq, d, 0)
        tgseq = seq2gloss(l)  # reintroduces segments for blanks
        pgseq = seq2gloss(pseq)
        for pg, pstart, pstop in pgseq:
            for tg, start, stop in tgseq:
                overlaps = min(stop, pstop) - max(start, pstart) > (stop - start) * acceptance_ratio
                
                # detected, but too soon
                if overlaps and pg == tg and pg != 0 and pstart <= start:
                    errors.append((1, start - pstart, tg, start, stop, pg, pstart, pstop, i))
            
                # detected, but too late
                if overlaps and pg == tg and pg != 0 and pstop >= stop:
                    errors.append((2, pstop - stop, tg, start, stop, pg, pstart, pstop, i))

                # misclassification
                if overlaps and pg != tg and pg != 0 and tg != 0:
                    errors.append((3, min(stop, pstop) - max(start, pstart), tg, start, stop, pg, pstart, pstop, i))
                
                # false positive
                overlaps = min(stop, pstop) - max(start, pstart) > (pstop - pstart) * acceptance_ratio
                if tg == 0 and pg != 0 and overlaps:
                    errors.append((5, min(stop, pstop) - max(start, pstart), 0, start, stop, pg, pstart, pstop, i))
                    
        for tg, start, stop in tgseq:
            # false negative
            if tg != 0 and np.mean(pseq[start:stop] == 0) > acceptance_ratio:
                errors.append((4, np.sum(pseq[start:stop] == 0), tg, start, stop, 0, -1, -1, i))
    
    return np.array(errors)
                
hmm_errors = triage_errors(hmm_preds, gloss_seqs_val, durations_val)

rnn_errors = triage_errors(rnn_preds, gloss_seqs_val, durations_val)

In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(2, 3, 1)
hmm_too_soon = hmm_errors[hmm_errors[:, 0] == 1, 1]
rnn_too_soon = rnn_errors[rnn_errors[:, 0] == 1, 1]
bins = np.linspace(0, 10, 10)
# plt.gca().set_yscale('log')

plt.bar(bins[:-1], np.histogram(hmm_too_soon, bins=bins)[0], alpha=.5)
plt.bar(bins[:-1], np.histogram(rnn_too_soon, bins=bins)[0], alpha=.5)
plt.legend(["hmm", "rnn"])
plt.title("too soon")

plt.subplot(2, 3, 2)
hmm_too_soon = hmm_errors[hmm_errors[:, 0] == 2, 1]
rnn_too_soon = rnn_errors[rnn_errors[:, 0] == 2, 1]
bins = np.linspace(0, 10, 10)

plt.bar(bins[:-1], np.histogram(hmm_too_soon, bins=bins)[0], alpha=.5)
plt.bar(bins[:-1], np.histogram(rnn_too_soon, bins=bins)[0], alpha=.5)
plt.legend(["hmm", "rnn"])
plt.title("too late")

plt.subplot(2, 3, 3)
hmm_too_soon = hmm_errors[hmm_errors[:, 0] == 3, 1]
rnn_too_soon = rnn_errors[rnn_errors[:, 0] == 3, 1]
bins = np.linspace(0, max(hmm_too_soon.max(), rnn_too_soon.max()), 20)

plt.bar(bins[:-1], np.histogram(hmm_too_soon, bins=bins)[0], alpha=.5)
plt.bar(bins[:-1], np.histogram(rnn_too_soon, bins=bins)[0], alpha=.5)
plt.legend(["hmm", "rnn"])
plt.title("mislassification")

plt.subplot(2, 3, 4)
hmm_too_soon = hmm_errors[hmm_errors[:, 0] == 4, 1]
rnn_too_soon = rnn_errors[rnn_errors[:, 0] == 4, 1]
bins = np.linspace(0, max(hmm_too_soon.max(), rnn_too_soon.max()), 20)

plt.bar(bins[:-1], np.histogram(hmm_too_soon, bins=bins)[0], alpha=.5)
plt.bar(bins[:-1], np.histogram(rnn_too_soon, bins=bins)[0], alpha=.5)
plt.legend(["hmm", "rnn"])
plt.title("false negative")

plt.subplot(2, 3, 5)
hmm_too_soon = hmm_errors[hmm_errors[:, 0] == 5, 1]
rnn_too_soon = rnn_errors[rnn_errors[:, 0] == 5, 1]
bins = np.linspace(0, max(hmm_too_soon.max(), rnn_too_soon.max()), 20)

plt.bar(bins[:-1], np.histogram(hmm_too_soon, bins=bins)[0], alpha=.5)
plt.bar(bins[:-1], np.histogram(rnn_too_soon, bins=bins)[0], alpha=.5)
plt.legend(["hmm", "rnn"])
plt.title("false positive")

plt.show()