In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.getcwd())))

os.environ['THEANO_FLAGS'] = "device=cuda0"

In [None]:
import shelve
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lproc import rmap, subset, chunk_load
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
from sltools.utils import pad_seq, dichotomy

from experiments.siamese_oneshot.a_data import tmpdir, \
    durations, labels, transformations, \
    train_subset, val_subset
from experiments.siamese_oneshot.b_preprocess import feat_seqs
from experiments.siamese_oneshot.c_model import build_model

np.set_printoptions(linewidth=100)

# Load dataset

In [None]:
max_time = 128
feat_seqs = rmap(lambda s: pad_seq(s, max_time), feat_seqs)

feat_seqs_train = subset(feat_seqs, train_subset)
transformations_train = subset(transformations, train_subset)
labels_train = labels[train_subset].astype(np.int32)
durations_train = durations[train_subset]

feat_seqs_val = subset(feat_seqs, val_subset)
transformations_val = subset(transformations, val_subset)
labels_val = labels[val_subset].astype(np.int32)
durations_val = durations[val_subset]

input_shape = feat_seqs[0][0].shape

# Build model

In [None]:
def cosine(x1, x2):
    return 1 - (x1 * x2).sum(axis=1) \
        / (x1.norm(2, axis=1) + 0.0001) / (x2.norm(2, axis=1) + 0.0001)

In [None]:
batch_size = 8

buffers = [
    np.zeros((4 * batch_size, 2, max_time) + input_shape, dtype=np.float32),
    np.zeros((4 * batch_size, 2), dtype=np.int32),
    np.zeros((4 * batch_size,), dtype=np.bool)]

In [None]:
model = build_model(feat_seqs[0][0].shape, 2 * batch_size, max_time)
l_linout = model['l_linout']
l_in = model['l_in']
l_durations = model['l_duration']

In [None]:
linout = lasagne.layers.get_output(l_linout, deterministic=True)
dists = (linout[0::2] - linout[1::2]).norm(2, axis=1)
predict_fn = theano.function([l_in.input_var, l_durations.input_var], outputs=dists)

In [None]:
targets_var = T.vector('targets')
l_rate_var = T.scalar('l_rate')

linout = lasagne.layers.get_output(l_linout, deterministic=False)
dists = cosine(linout[0::2], linout[1::2])
loss = T.switch(targets_var > .1,
                .5 * T.maximum(0, dists - .25) ** 2,
                .5 * T.maximum(0, .75 - dists) ** 2).sum()
params = lasagne.layers.get_all_params(l_linout, trainable=True)
updates = lasagne.updates.adam(loss, params, learning_rate=l_rate_var)
update_fn = theano.function(
    [l_in.input_var, l_durations.input_var, targets_var, l_rate_var],
    outputs=loss, updates=updates)

# Run training iterations

In [None]:
def compute_perfs(feat_seqs_, durations_, pairs_, tgts_, thres_=None):
    data_pairs = rmap(lambda p: np.stack([feat_seqs_[p[0]], feat_seqs_[p[1]]]), pairs_)
    duration_pairs = np.array([[durations_[p0], durations_[p1]] for p0, p1 in  pairs_])
    all_preds = np.empty((len(tgts_) - len(tgts_) % batch_size,))
    all_targets = np.empty((len(tgts_) - len(tgts_) % batch_size,))
    loader = chunk_load([data_pairs, duration_pairs, targets], buffers, batch_size, drop_last=True)
    
    i = 0
    for x, d, y in loader:
        all_preds[i:i + batch_size] = predict_fn(
            x.reshape((2 * batch_size, max_time) + input_shape), 
            d.reshape((2 * batch_size,)))
        all_targets[i:i + batch_size] = y
        i += batch_size
    
    if thres_ is None:
        thres_ = dichotomy(
            lambda t: 1 - np.mean(all_preds[all_targets > .5] > t)
                      / np.mean(all_preds[all_targets < .5] < t),
            0, 1, it=20)
    
    fnr = np.mean(all_preds[all_targets > .5] > thres_)
    fpr = np.mean(all_preds[all_targets < .5] < thres_)
    
    return all_targets, all_preds, thres_, fpr, fnr


def sample_pairs(vocabulary, labels, n, positive_ratio=0.6, test=None):
    test = test or (lambda *_: True)
    where_labels = {l: np.where(labels == l)[0] for l in vocabulary}
    where_not_labels = {l: np.where(labels != l)[0] for l in vocabulary}

    pairs = np.empty((n, 2), dtype=np.uint64)
    positive = np.empty((n,), dtype=np.bool)

    for k, i in enumerate(np.random.permutation(n) % len(labels)):
        l = labels[i]
        p = np.random.random() < positive_ratio
        if p:
            j = np.random.choice(where_labels[l])
            while not test(i, j):
                j = np.random.choice(where_labels[l])
        else:
            j = np.random.choice(where_not_labels[l])

        pairs[k] = i, j
        positive[k] = p

    return pairs, positive

In [None]:
l_rate = 3e-4

report = shelve.open(os.path.join(tmpdir, "rnn_report"))
running_loss = 0

for e in range(0, 30):
    pairs, targets = sample_pairs(
        np.unique(labels), labels_train, len(train_subset), .5,
        lambda i, j: transformations_train[j][0] != transformations_train[i][0])
    data_pairs = rmap(lambda p: np.stack([feat_seqs_train[p[0]], feat_seqs_train[p[1]]]), pairs)
    durations_pairs = rmap(lambda p: np.array([durations_train[p[0]], durations_train[p[1]]]), pairs)
    loader = chunk_load([data_pairs, durations_pairs, targets], buffers, batch_size, drop_last=True)
    
    batch_losses = []
    for (x, d, y) in loader:
        batch_loss = update_fn(
            x.reshape((2 * batch_size, max_time) + input_shape), 
            d.reshape((2 * batch_size,)),
            y, l_rate)
        batch_losses.append(batch_loss)
        running_loss = .98 * running_loss + .02 * batch_loss
        if len(batch_losses) % 30 == 0:
            print("\rloss: {}".format(running_loss), end="", flush=True)

    # Report
    print("\repoch {:3d} loss : {}".format(e, running_loss))
    
    all_targets, all_preds, thres, fpr_train, fnr_train = compute_perfs(
        feat_seqs_train, durations_train, pairs, targets, None)
    print("         thres : ", thres)
    print("   fpr/fnr (t) : {:.2f}/{:.2f}".format(fpr_train, fnr_train))
    
    pairs, targets = sample_pairs(np.unique(labels), labels_val, len(val_subset), .5,
                                  lambda i, j: transformations_val[j][0] != transformations_val[i][0])
    all_targets, all_preds, thres, fpr_val, fnr_val = compute_perfs(
        feat_seqs_val, durations_val, pairs, targets, thres)
    fpr_val = np.mean(all_preds[all_targets < .5] < thres)
    fnr_val = np.mean(all_preds[all_targets > .5] > thres)
    print("   fpr/fnr (v) : {:.2f}/{:.2f}".format(fpr_val, fnr_val))
    
    report[str(e)] = {
        'batch_losses': batch_losses,
        'epoch_loss': running_loss,
        'score_train': (fpr_train, fnr_train),
        'score_val': (fpr_val, fnr_val),
        'params': lasagne.layers.get_all_param_values(l_linout)
    }

In [None]:
plt.figure(figsize=(12, 4))
plt.plot(np.arange(30), [report[str(i)]['score_train'][0] for i in range(30)], linestyle=':', color='red', label='fpr_train')
plt.plot(np.arange(30), [report[str(i)]['score_train'][1] for i in range(30)], linestyle=':', color='blue', label='fpr_train')
plt.plot(np.arange(30), [report[str(i)]['score_val'][0] for i in range(30)], linestyle='-', color='red', label='fpr_val')
plt.plot(np.arange(30), [report[str(i)]['score_val'][1] for i in range(30)], linestyle='-', color='blue', label='fnr_val')
plt.legend()
plt.show()

# Preview results

In [None]:
def preview_perfs(all_targets, all_preds, thres_):
    print("fpr = ", np.mean(all_preds[all_targets < .5] < thres_))
    print("fnr = ", np.mean(all_preds[all_targets > .5] > thres_))
    print("thres = ", thres_)

    plt.figure(figsize=(12, 3))
    
    plt.subplot(1, 2, 1)
    bins = np.linspace(0, 1, 40)
    pos, _ = np.histogram(all_preds[all_targets == 1], bins=bins)
    neg, _ = np.histogram(all_preds[all_targets == 0], bins=bins)
    plt.bar(bins[:-1], pos / pos.sum(), width=.025, color='red', alpha=.5)
    plt.bar(bins[:-1], neg / neg.sum(), width=.025, color='blue', alpha=.5)
    plt.plot([thres_, thres_], [0, .2])
    
    plt.subplot(1, 2, 2)
    fpr, tpr, _ = roc_curve(all_targets > .5, all_preds)
    plt.plot(tpr, fpr)
    plt.gca().set_aspect('equal')
    
    plt.show()


pairs, targets = sample_pairs(
    np.unique(labels), labels_train, len(train_subset) * 3, .5,
    lambda i, j: transformations_train[j][0] != transformations_train[i][0])
all_targets, all_preds, thres, fpr, fnr = compute_perfs(
    feat_seqs_train, durations_train, pairs, targets, None)
preview_perfs(all_targets, all_preds, thres)
print("  fpr / fnr (t) : {:.2f}/{:.2f}".format(fpr, fnr))

pairs, targets = sample_pairs(
    np.unique(labels), labels_val, len(val_subset) * 3, .5,
    lambda i, j: transformations_val[j][0] != transformations_val[i][0])
all_targets, all_preds, thres, fpr, fnr = compute_perfs(
    feat_seqs_val, durations_val, pairs, targets, thres)
preview_perfs(all_targets, all_preds, thres)
print("  fpr / fnr (t) : {:.2f}/{:.2f}".format(fpr, fnr))

# Preview training data

In [None]:
from sklearn.metrics import confusion_matrix

pairs, tgts = sample_pairs(
    np.unique(labels), labels_train, len(train_subset) * 3, .5,
    lambda i, j: transformations_train[j][0] != transformations_train[i][0])
cnf = confusion_matrix(labels_train[pairs[:, 0]], labels_train[pairs[:, 1]])
print(cnf)