In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.getcwd())))

os.environ['THEANO_FLAGS'] = "device=cuda0"

In [None]:
import os
import re
import shelve
import numpy as np
import lasagne
import seqtools
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from experiments.siamese_triplet.a_data import \
    cachedir, durations, labels, recordings, \
    train_subset, val_subset
from experiments.siamese_triplet.b_preprocess import skel_feat_seqs
from experiments.siamese_triplet.c_model import skel_rnn, build_predict_fn
from experiments.siamese_triplet.common import sample_episode, \
    evaluate_knn, evaluate_matching

np.set_printoptions(linewidth=100)

In [None]:
# Data ----------------------------------------------------------------------------------

feat_seqs_train = [seqtools.gather(skel_feat_seqs, train_subset)]
labels_train = labels[train_subset].astype(np.int32)
recordings_train = recordings[train_subset]
durations_train = durations[train_subset].astype(np.int32)

feat_seqs_val = [
    seqtools.gather(skel_feat_seqs, val_subset)
]
labels_val = labels[val_subset].astype(np.int32)
recordings_val = recordings[val_subset]
durations_val = durations[val_subset].astype(np.int32)

del recordings, labels, durations, skel_feat_seqs

report = shelve.open(os.path.join(cachedir, "rnn_report"))

In [None]:
# Model ---------------------------------------------------------------------------------

modality = report['meta']['modality']
max_time = report['meta']['max_time']
batch_size = report['meta']['batch_size']
encoder_kwargs = report['meta']['encoder_kwargs']

model_dict = skel_rnn(
    *tuple(f[0][0].shape for f in feat_seqs_train),
    batch_size=batch_size, max_time=max_time,
    encoder_kwargs=encoder_kwargs)

l_linout = model_dict['l_linout']
l_in = model_dict['l_in']
l_duration = model_dict['l_duration']

last_iteration = str(sorted(int(e) for e in report.keys() if re.match(r'[0-9]+', e))[-1])
# last_iteration = '900'
lasagne.layers.set_all_param_values(l_linout, report[last_iteration]['params'])

In [None]:
# Evaluation routines -------------------------------------------------------------------

predict_fn = build_predict_fn(model_dict, report['meta']['batch_size'],
                              report['meta']['max_time'])
embeddings_train = predict_fn(feat_seqs_train, durations_train)
embeddings_val = predict_fn(feat_seqs_val, durations_val)

def evaluate(embeddings_, labels_, recordings_,
             shots_grid_, voca_size_grid_,
             classifier_, n_episodes_):
    vocabulary = np.unique(labels_)
    results = np.empty((len(shots_grid_), len(voca_size_grid_)))

    for i, shots in enumerate(shots_grid_):
        for j, voca_size in enumerate(voca_size_grid_):
            ranks = []
            for _ in range(n_episodes_):
                ep_train_subset, ep_test_subset, _ = sample_episode(
                    labels_, recordings_, vocabulary, voca_size, shots)

                if classifier_ == "kernel":
                    ep_ranks = evaluate_matching(
                        embeddings_[ep_train_subset],
                        embeddings_[ep_test_subset],
                        labels_[ep_train_subset],
                        labels_[ep_test_subset],
                        voca_size, shots)

                elif classifier_ == "knn":
                    ep_ranks = evaluate_knn(
                        embeddings_[ep_train_subset],
                        embeddings_[ep_test_subset],
                        labels_[ep_train_subset],
                        labels_[ep_test_subset],
                        k=neighbourgs)
                else:
                    raise ValueError("unsupported classifier type")

                ranks.extend(ep_ranks)

            results[i, j] = np.mean(np.array(ranks) == 0)

    return results

In [None]:
# Evaluate model ------------------------------------------------------------------------

classifier = "kernel"
neighbourgs = 1
shots_grid = np.array([1, 2, 3, 4, 5])
voca_size_grid = np.array([5, 10, 15, 20, 25, 30])
n_episodes = 1000

result_train = evaluate(embeddings_train, labels_train, recordings_train,
                        shots_grid, voca_size_grid,
                        classifier, n_episodes)
result_val = evaluate(embeddings_val, labels_val, recordings_val,
                      shots_grid, voca_size_grid,
                      classifier, n_episodes)

In [None]:
# Display -------------------------------------------------------------------------------

fig = plt.figure(figsize=(8, 7))

ax = fig.add_subplot(111, projection='3d')
for i, shots in enumerate(shots_grid):
    for j, voca_size in enumerate(voca_size_grid):
        ax.plot(
            [shots, shots],
            [voca_size, voca_size],
            [result_train[i, j], result_val[i, j]],
            alpha=.5, c='k', linestyle=':')
ax.scatter(
    np.tile(shots_grid[:, None], (1, len(voca_size_grid))),
    np.tile(voca_size_grid[None, :], (len(shots_grid), 1)),
    result_train,
    c=np.ravel(result_train),
    alpha=.5)
for j in range(len(voca_size_grid)):
    ax.plot(shots_grid, result_train[:, j], zs=0, zdir='y', c='black', alpha=0.1)

ax.plot_wireframe(
    np.tile(shots_grid[:, None], (1, len(voca_size_grid))),
    np.tile(voca_size_grid[None, :], (len(shots_grid), 1)),
    result_val,
    alpha=1.)
ax.scatter(
    np.tile(shots_grid[:, None], (1, len(voca_size_grid))),
    np.tile(voca_size_grid[None, :], (len(shots_grid), 1)),
    result_val,
    c=np.ravel(result_val),
    alpha=1.)
for j in range(len(voca_size_grid)):
    ax.plot(shots_grid, result_val[:, j], zs=0, zdir='y', c='black', alpha=0.3)

ax.set_xticks(shots_grid)
ax.set_yticks(voca_size_grid)
ax.set_xlabel("shots")
ax.set_ylabel("vocabulary")
ax.set_zlabel("accuracy")

# ax.view_init(15, 290)
ax.view_init(20, 140)

fig.show()