In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.getcwd())))

os.environ['THEANO_FLAGS'] = "device=cpu"

In [None]:
import shelve
from bisect import bisect
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lproc import rmap, subset, chunk_load
import matplotlib.pyplot as plt

np.set_printoptions(linewidth=100)

In [None]:
from experiments.siamese_triplet.a_data import tmpdir

# Load dataset

In [None]:
from sltools.nn_utils import adjust_length

from experiments.siamese_triplet.a_data import \
    durations, labels, transformations, \
    train_subset, val_subset
from experiments.siamese_triplet.b_preprocess import skel_feat_seqs

max_time = 128
skel_feat_seqs = rmap(lambda s: adjust_length(s, max_time), skel_feat_seqs)

feats_seqs_train = [
    subset(skel_feat_seqs, train_subset)
    ]
transformations_train = subset(transformations, train_subset)
labels_train = labels[train_subset].astype(np.int32)
durations_train = durations[train_subset].astype(np.int32)

feats_seqs_val = [
    subset(skel_feat_seqs, val_subset)
    ]
transformations_val = subset(transformations, val_subset)
labels_val = labels[val_subset].astype(np.int32)
durations_val = durations[val_subset].astype(np.int32)

# Build model

In [None]:
from experiments.siamese_triplet.c_model import skel_rnn

batch_size = 16

buffers = [np.zeros((4 * batch_size, max_time) + s[0].shape[1:], dtype=np.float32)
           for s in feats_seqs_train] \
          + [np.zeros((4 * batch_size), dtype=np.int32)]

In [None]:
model = skel_rnn(skel_feat_seqs[0][0].shape, batch_size, max_time)
l_linout = model['l_linout']
l_in = model['l_in']
l_duration = model['l_duration']

In [None]:
report = shelve.open(os.path.join(tmpdir, "rnn_report"))

# Run training iterations

In [None]:
from sltools.models.siamese import triplet_loss

l_rate_var = T.scalar('l_rate')

linout = lasagne.layers.get_output(l_linout, deterministic=False)
loss = triplet_loss(linout[0::3], linout[1::3], linout[2::3]).sum()
params = lasagne.layers.get_all_params(l_linout, trainable=True)
updates = lasagne.updates.adam(loss, params, learning_rate=l_rate_var)
update_fn = theano.function(
    [l.input_var for l in l_in] + [l_duration.input_var, l_rate_var],
    outputs=loss, updates=updates)

In [None]:
def sample_triplets(vocabulary, labels, n, test=None):
    test = test or (lambda *_: True)
    where_labels = {l: np.where(labels == l)[0] for l in vocabulary}
    where_not_labels = {l: np.where(labels != l)[0] for l in vocabulary}

    triplets = np.empty((n, 3), dtype=np.uint64)

    i = 0
    while i < len(labels):
        left = i % len(labels)
        wl = where_labels[labels[left]]
        wn = where_not_labels[labels[left]]
        middle = np.random.choice(wl)
        right = np.random.choice(wn)

        while not test(left, middle, right):
            middle = np.random.choice(wl)
            right = np.random.choice(wn)

        triplets[i] = [left, middle, right]
        i += 1

    return triplets

In [None]:
report.clear()

l_rate = 1e-4
running_loss = 0

for e in range(10):
    batch_losses = []
    triplets = np.array(sample_triplets(
        sorted(set(labels_train)), labels_train, len(labels_train),
        test=lambda i, j, k: transformations_train[i][0] != transformations_train[j][0]))
    inputs = [subset(f, np.concatenate(triplets)) for f in feats_seqs_train]
    inputs.append(durations_train[np.concatenate(triplets)])
    minibatches = chunk_load(inputs, buffers, bloc_size=batch_size, drop_last=True)
    
    for i, (x, d) in enumerate(minibatches):
        batch_loss = float(update_fn(x, d, l_rate))
        batch_losses.append(batch_loss)
        running_loss = .98 * running_loss + .02 * batch_loss
        if i % 30 == 0:
            print("\rloss: {}".format(running_loss), end="", flush=True)
  
    report[str(e)] = {
        'batch_losses': batch_losses,
        'epoch_loss': running_loss,
        'params': lasagne.layers.get_all_param_values(l_linout)
    }

In [None]:
batch_losses = np.concatenate([r['batch_losses'] for r in report.values()])
plt.figure(figsize=(12, 3))
x = np.arange(0, len(batch_losses), 20)
y = np.array([np.mean(batch_losses[max(0, i-40):i+40]) for i in range(0, len(batch_losses), 20)])
err = np.array([np.std(batch_losses[max(0, i-40):i+40]) for i in range(0, len(batch_losses), 20)])

plt.plot(x, y)
plt.fill_between(x, y - err, y + err, alpha=.3)
plt.show()

In [None]:
list(report.keys())

# Evaluate performances

In [None]:
from sltools.models.siamese import build_predict_fn

iteration = 9
lasagne.layers.set_all_param_values(l_linout, report[str(iteration)]['params'])
predict_fn = build_predict_fn(model, batch_size)

In [None]:
train_vects = predict_fn(feats_seqs_train + [durations_train])
val_vects = predict_fn(feats_seqs_val + [durations_val])

In [None]:
from bisect import bisect_left
from scipy.spatial.distance import cdist, pdist, squareform


def experiment(negvects, posvects):
    """Check 1-nearest neighbour rank using from 1 to all 
    of same-class examples vs the negative ones.
    """
    negdists = cdist(posvects, negvects)
    posdists = squareform(pdist(posvects))
#     posdists *= np.tril(np.ones_like(np.random.rand(4, 4)) * np.inf, k=0) + 1
    
    results = []
    for i in range(1, len(posvects)):
        posd = np.min(posdists[i, :i])
        rank = bisect_left(negdists[i], posd)
        results.append((i, posd, rank))

    return np.array(results)


def compute_perfs(vects, labels, ntests, k, n):
    vocabulary = np.sort(np.unique(labels))
    idx_where = {l: np.where(labels == l)[0] for l in vocabulary}
    
    results = []
    for _ in range(ntests):
        voca_subset = np.random.permutation(vocabulary)[:k]
        
        neg_where = np.concatenate([
            idx_where[l][np.random.permutation(len(idx_where[l]))[:n]]
            for l in voca_subset[:-1]])
        negvects = vects[neg_where]
        
        pos_where = idx_where[voca_subset[-1]][
            np.random.permutation(len(idx_where[voca_subset[-1]]))[:n]]
        posvects = vects[pos_where]
        
        results.extend(np.concatenate([
            np.full((n - 1, 1), voca_subset[-1]),
            experiment(negvects, posvects)], axis=1))

    return np.array(results)

results_train = compute_perfs(
    train_vects[:len(labels_train) // 5], 
    labels_train[:len(labels_train) // 5],
    1000, 5, 40)
results_val = compute_perfs(
    val_vects, 
    labels_val,
    1000, 5, 40)

In [None]:
[np.mean(results_train[results_train[:, 1] == i, 3])
 for i in range(1, 15)]

In [None]:
[np.mean(results_val[results_val[:, 1] == i, 3])
 for i in range(1, 15)]

In [None]:
cutoff = 16
fig, axs = plt.subplots(nrows=5, ncols=1, figsize=(5, 12))
for i, ax in zip(range(1, 6), axs):
    v = results_train[results_train[:, 1] == i, 3]
    b = np.arange(-.5, max(v) + 2, 1)
    h, _ = np.histogram(v, bins=b)
    h[cutoff] = np.sum(h[cutoff:])
    h = h[:cutoff + 1]
    ax.bar(np.arange(cutoff + 1), h / h.sum())
    ax.set_ylim((0, 1))
    ax.set_title("{}-shot".format(i))
    ax.set_xlabel("rank")
    ax.set_xticks([i for i in range(0, cutoff, 2)] + [cutoff])
    ax.set_xticklabels([i for i in range(0, cutoff, 2)] + [">{}".format(cutoff)])

plt.tight_layout()
plt.show()

In [None]:
cutoff = 12
fig, axs = plt.subplots(nrows=5, ncols=1, figsize=(5, 12))
for i, ax in zip(range(1, 6), axs):
    v = results_val[results_val[:, 1] == i, 3]
    b = np.arange(-.5, max(v) + 2, 1)
    h, _ = np.histogram(v, bins=b)
    h[cutoff] = np.sum(h[cutoff:])
    h = h[:cutoff + 1]
    ax.bar(np.arange(cutoff + 1), h / h.sum())
    ax.set_ylim((0, 1))
    ax.set_title("{}-shot".format(i))
    ax.set_xlabel("rank")
    ax.set_xticks([i for i in range(0, cutoff, 2)] + [cutoff])
    ax.set_xticklabels([i for i in range(0, cutoff, 2)] + [">={}".format(cutoff)])

plt.tight_layout()
plt.show()

In [None]:
plt.hist(results_val[:, 2], bins=30)