# Sampling 2D pose DKF trained on (any) 2D pose database

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The next few variables are important for choosing *which* dataset/model should be used.

In [None]:
DS = 'mpii-ca2'

if DS == 'ikeadb':
    DS_DIR = './expt-ikeadb/'
    PFX = DS_DIR + 'chkpt-ikeadb/'
    CONFIG_PATH = PFX + 'DKF_lr-8_0000e-04-vm-L-inf-structured-dh-50-ds-10-nl-relu-bs-20-ep-2000-rs-600-ttype-simple_gated-etype-mlp-previnp-False-ar-1_0000e+01-rv-5_0000e-02-nade-False-nt-5000-cond-True-ikeadb-acts-config.pkl'
    WEIGHT_PATH = PFX + 'DKF_lr-8_0000e-04-vm-L-inf-structured-dh-50-ds-10-nl-relu-bs-20-ep-2000-rs-600-ttype-simple_gated-etype-mlp-previnp-False-ar-1_0000e+01-rv-5_0000e-02-nade-False-nt-5000-cond-True-ikeadb-acts-EP425-params.npz'
    EXTRA_ARGS = '-vm L -cond -infm structured -ds 10 -dh 50 -uid past-only'.split()
elif DS == 'penn':
    DS_DIR = './expt-penn-action/'
    PFX = DS_DIR + 'chkpt-penn/'
    CONFIG_PATH = PFX + 'DKF_lr-8_0000e-04-vm-L-inf-structured-dh-50-ds-10-nl-relu-bs-20-ep-2000-rs-600-ttype-simple_gated-etype-mlp-previnp-False-ar-1_0000e+01-rv-5_0000e-02-nade-False-nt-5000-cond-False-penn-acts-config.pkl'
    WEIGHT_PATH = PFX + 'DKF_lr-8_0000e-04-vm-L-inf-structured-dh-50-ds-10-nl-relu-bs-20-ep-2000-rs-600-ttype-simple_gated-etype-mlp-previnp-False-ar-1_0000e+01-rv-5_0000e-02-nade-False-nt-5000-cond-False-penn-acts-EP1975-params.npz'
    EXTRA_ARGS = '-vm L -infm structured -ds 10 -dh 50 -uid penn-acts'.split()
elif DS == 'mpii-ca2':
    DS_DIR = './expt-mpii-ca2/'
    PFX = DS_DIR + 'chkpt-mpii-ca2/'
    CONFIG_PATH = PFX + 'DKF_lr-8_0000e-04-vm-L-inf-structured-dh-50-ds-10-nl-relu-bs-20-ep-2000-rs-600-ttype-simple_gated-etype-mlp-previnp-False-ar-1_0000e+01-rv-5_0000e-02-nade-False-nt-5000-cond-False-mpii-ca2-config.pkl'
    WEIGHT_PATH = PFX + 'DKF_lr-8_0000e-04-vm-L-inf-structured-dh-50-ds-10-nl-relu-bs-20-ep-2000-rs-600-ttype-simple_gated-etype-mlp-previnp-False-ar-1_0000e+01-rv-5_0000e-02-nade-False-nt-5000-cond-False-mpii-ca2-EP975-params.npz'
    EXTRA_ARGS = '-vm L -infm structured -ds 10 -dh 50 -uid mpii-ca2'.split()
else:
    raise ValueError('Unknown dataset %s' % DS)

import sys
sys.path.append(DS_DIR)

In [None]:
import re
import os
import json
import addpaths
from load import loadDataset
import p2d_loader
import os
import numpy as np
from scipy.signal import convolve2d
import h5py
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report

In [None]:
del sys.argv[1:]

sys.argv.extend(EXTRA_ARGS)
sys.argv.extend(['-reload', WEIGHT_PATH, '-params', CONFIG_PATH])

In [None]:
old_dir = os.getcwd()
try:
    os.chdir(DS_DIR)
    dataset = loadDataset()
finally:
    os.chdir(old_dir)

In [None]:
train_poses = dataset['train']
train_poses.shape

In [None]:
from parse_args_dkf import params
from utils.misc import removeIfExists,createIfAbsent,mapPrint,saveHDF5,displayTime
from stinfmodel_fast.dkf import DKF
import stinfmodel_fast.learning as DKF_learn
import stinfmodel_fast.evaluate as DKF_evaluate
from theano import config

In [None]:
if 'p2d_action_names' in dataset:
    act_names = dataset['p2d_action_names']
    for idx, name in enumerate(act_names):
        print('ID% 3i (action% 3i/%i): %s' % (idx, idx+1, len(act_names), name))
    one_hot_acts = {}
    hot_vec_size = len(act_names)
    for hot_bit, name in enumerate(act_names):
        one_hot_acts[name] = (np.arange(hot_vec_size) == hot_bit)
parents = dataset['p2d_parents']
print('Parents array: %s' % parents)

In [None]:
use_cond = bool(params.get('use_cond', False))
params['savedir']+=PFX

# Add dataset and NADE parameters to "params" which will become part of the
# model
for k in ['dim_observations','data_type']:
    params[k] = dataset[k]
mapPrint('Options: ',params)
if params['use_nade']:
    params['data_type']='real_nade'

# Remove from params
removeIfExists('./NOSUCHFILE')
reloadFile = params.pop('reloadFile')
pfile=params.pop('paramFile')
# paramFile is set inside the BaseClass in theanomodels
# to point to the pickle file containing params"""
assert os.path.exists(pfile),pfile+' not found. Need paramfile'
print 'Reloading trained model from : ',reloadFile
print 'Assuming ',pfile,' corresponds to model'
dkf  = DKF(params, paramFile = pfile, reloadFile = reloadFile)

In [None]:
def smooth_seq(seq):
    assert seq.ndim in {2, 3}, seq.shape
    if seq.ndim == 3:
        rv = np.zeros_like(seq)
        for r in range(len(seq)):
            rv[r] = p2d_loader.gauss_filter(seq[r], sigma=1.0)
        return rv
    # 2d, filter whole thing
    return p2d_loader.gauss_filter(seq, sigma=1.0)

## Non-action-conditional modelling

In [None]:
def scrape_any(poses, count, sigma, mu, parents):
    indices = np.random.permutation(len(poses))[:count]
    out_data = poses[indices]
    return p2d_loader.reconstruct_poses(out_data * sigma + mu, parents)

In [None]:
if not use_cond:
    # No need to do conditional nonsense!
    num_seqs = 32
    seq_length = 256
    dest_dir = DS_DIR + 'generated/'
    try:
        os.makedirs(dest_dir)
    except OSError:
        pass
    
    oodles_of_samples = dkf.sample(nsamples=num_seqs, T=seq_length)
    sample_X, sample_Z = oodles_of_samples
    mu = dataset['p2d_mean'].reshape((1, 1, -1))
    sigma = dataset['p2d_std'].reshape((1, 1, -1))
    real_X = p2d_loader.reconstruct_poses(sample_X * sigma + mu, parents)
        
    # Scrape some training poses, too
    train_poses = scrape_any(
        dataset['train'],
        num_seqs,
        sigma,
        mu,
        parents)
    val_poses = scrape_any(
        dataset['valid'],
        num_seqs,
        sigma,
        mu,
        parents)
        
    smooth_sampled_times = smooth_seq(
        real_X.reshape(real_X.shape[:2] + (-1,))
    ).reshape(real_X.shape[:2] + (2, -1))
    dest_fn = os.path.join(dest_dir, 'generated.npz')
    print('Saving ' + dest_fn)
    np.savez(
        dest_fn, poses_gen=real_X,
        poses_smooth=smooth_sampled_times,
        poses_train=train_poses,
        poses_val=val_poses,
        parents=parents)

## Action-conditional modelling

In [None]:
def scrape_by_act(poses, actions, one_hot_rep, count, sigma, mu, parents):
    # actions should be N*T*A
    # from IPython.core.debugger import Tracer; Tracer()()
    assert np.prod(one_hot_rep.shape) == one_hot_rep.size, \
        "one-hot rep must be a vector"
    act_num = np.argmax(one_hot_rep.flatten())
    assert actions.ndim == 3, actions.shape
    act_nums = np.argmax(actions, axis=2)
    assert act_nums.shape == poses.shape[:2], \
        "mismatched action shape %s and pose shape %s" \
            % (actions.shape, poses.shape)
    # try to find sequences that feature part of the action
    has_act, = np.nonzero((act_num == act_nums).any(axis=1))
    indices = has_act[np.random.permutation(len(has_act))][:count]
    if len(indices) < count:
        print('Only found %d instances of action with ID %d'
              % (len(indices), act_num))
    out_data = poses[indices]
    return p2d_loader.reconstruct_poses(out_data * sigma + mu, parents)

def sanitise_name(name):
    # for sanitising filenames
    return re.sub(r'[^a-z0-9_-]+', '-', name.lower()).strip('-')

In [None]:
if use_cond:
    seqs_per_act = 9
    seq_length = 256
    dest_dir = DS_DIR + 'generated-wacts/'
    try:
        os.makedirs(dest_dir)
    except OSError:
        pass
        
    # start by generating some sequences for each action type
    for act_name, one_hot_rep in one_hot_acts.items():
        print('Working on action %s' % act_name)
        U = np.stack([one_hot_rep] * seq_length, axis=0)
        oodles_of_samples = dkf.sample(nsamples=seqs_per_act, T=seq_length, U=U)
        sample_X, sample_Z = oodles_of_samples
        mu = dataset['p2d_mean'].reshape((1, 1, -1))
        sigma = dataset['p2d_std'].reshape((1, 1, -1))
        real_X = p2d_loader.reconstruct_poses(sample_X * sigma + mu, parents)
        
        # Scrape some training poses, too
        train_poses = scrape_by_act(
            dataset['train'],
            dataset['train_cond_vals'],
            one_hot_rep,
            seqs_per_act,
            sigma,
            mu,
            parents)
        val_poses = scrape_by_act(
            dataset['valid'],
            dataset['val_cond_vals'],
            one_hot_rep,
            seqs_per_act,
            sigma,
            mu,
            parents)
        
        smooth_sampled_times = smooth_seq(
            real_X.reshape(real_X.shape[:2] + (16,))
        ).reshape(real_X.shape[:2] + (2, 8))
        actn = sanitise_name(act_name)
        dest_pfx = os.path.join(dest_dir, 'act-%s' % actn)
        dest_fn = dest_pfx + '.npz'
        print('Saving ' + dest_fn)
        np.savez(
            dest_fn, poses_gen=real_X,
            poses_smooth=smooth_sampled_times,
            poses_train=train_poses,
            poses_val=val_poses,
            parents=parents
        )

    # now choose random pairs of (distinct) actions and simulate
    # a transition at half-way point
    num_pairs = 10
    nacts = len(act_names)
    chosen_idxs = np.random.permutation(nacts * (nacts-1))[:num_pairs]
    act_pairs = [(act_names[idxp%nacts], act_names[idxp//nacts]) \
                 for idxp in chosen_idxs]
    for act1, act2 in act_pairs:
        print('Computing sequence for action %s -> %s' % (act1, act2))
        
        len1 = seq_length // 2
        len2 = seq_length - len1
        rep1 = one_hot_acts[act1]
        rep2 = one_hot_acts[act2]
        U = np.stack([rep1] * len1 + [rep2] * len2, axis=0)
        oodles_of_samples = dkf.sample(nsamples=seqs_per_act, T=seq_length, U=U)
        sample_X, sample_Z = oodles_of_samples
        mu = dataset['p2d_mean'].reshape((1, 1, -1))
        sigma = dataset['p2d_std'].reshape((1, 1, -1))
        real_X = p2d_loader.reconstruct_poses(sample_X * sigma + mu, parents)
        
        smooth_sampled_times = smooth_seq(
            real_X.reshape(real_X.shape[:2] + (16,))
        ).reshape(real_X.shape[:2] + (2, 8))
        act1n = sanitise_name(act1)
        act2n = sanitise_name(act2)
        dest_pfx = os.path.join(
            dest_dir,
            'trans-%s-to-%s' % (act1n, act2n))
        dest_fn = dest_pfx + '.npz'
        print('Saving ' + dest_fn)
        np.savez(
            dest_fn,
            poses_trans=real_X,
            poses_trans_smooth=smooth_sampled_times,
            parents=parents)

# Action classification

The next section of the notebook will be dedicated to action classification using the pose DKF. Rougly, the setup is this:
    
1. Extract a sequences of poses $k$ poses, $p_{t:t+k-1}$, associated with a given action. I will try to make sure that $k \geq 10$ (or that $k$ is otherwise suitably long).
2. Run the inference network over $p_{t:t+k-1}$ to obtain $z_{t:t+k-1}$.
3. Pass a pooled representation of the latent vectors into an SVM for classification (e.g. mean of values).

Some fine points:

- The current model is trained to require actions as input at each step of the encoder *and* decoder. Given that actions must be supplied to the network at each time step, it's not clear what an action classifier on top of the latents actually achieves. Here are some possible ways of dealing with that:
   1. Ignore it. See if the SVM can recover the given action labels from latents alone. Doing so isn't terribly impressive, but *failing* to do so would be strong evidence that this is a poor approach to action classification (**what I'm doing right now**; I'll do something more intelligent for Penn and MPII).
   2. Give the network random actions so as not to bias it. Unfortunately, the network isn't actually trained on random actions, so this could make the results meaningless.
   3. Train a new model where the decoder is not action-conditional. The encoder could be action-conditional or non-action-conditional; again, it's unclear whether this will improve or harm classification accuracy when using latents.
- The whole pipeline is likely to be sensitive to the latent pooling method used. Will have to experiment with picking the last latent, as well as mean/sum/max pooling.

In [None]:
def seq_latents(feat_sequence, true_action=None):
    # must be (T, D)
    assert feat_sequence.ndim == 2, feat_sequence.shape
    cond_vals = None
    if use_cond:
        assert true_action is not None, \
            'need true action because latents are action-conditional'
        cond_vals = np.zeros((1, len(feat_sequence), len(act_names)), dtype='float32')
        cond_vals[0, range(len(feat_sequence)), true_action] = 1
    mask = np.ones((1, len(feat_sequence),))
    # the zs are just samples
    # maybe I should get more of them? just a matter of calling .infer twice
    feat_in = feat_sequence[np.newaxis, ...].astype('float32')
    z, mu, logcov = DKF_evaluate.infer(dkf, feat_sequence[np.newaxis, ...], mask, cond_vals=cond_vals)
    # TODO: what if I condition on cat[mu, logcov] instead? Noise from z computation
    # *might* act as a regulariser, but it might also just be noise :P
    return z[0]
    
def to_latent_ds(ds):
    """Convert a list of (pose sequence, action ID) pairs into a dataset
    consisting of a matrix of latents (corresponding to pose sequences)
    and a vector of action IDs (as supplied)."""
    X_blocks = []
    Y_blocks = []
    seen = 0
    for feat_seq, true_act in ds:
        if true_act == 0 and DS == 'ikeadb':
            # action 0 on IkeaDB just means "unlabelled"
            # XXX: how the hell is this continuing to happen?! I'm still getting action 0.
            continue
        seen += 1
        if seen == 1 or seen % 250 == 0:
            print('Working on sequence %d' % seen)
        latents = seq_latents(feat_seq, true_act)
        # take the mean
        # lat_val = latents.mean(axis=0)
        # take the last
        lat_val = latents[-1]
        X_blocks.append(lat_val)
        Y_blocks.append(true_act)

    X = np.stack(X_blocks)
    Y = np.array(Y_blocks)
    assert X.ndim == 2, X.shape
    assert Y.ndim == 1, Y.shape
    assert X.shape[0] ==  Y.shape[0], (X.shape, Y.shape)

    return X, Y

def balance_aclass_ds(aclass_ds, act_names):
    # find appropriate number of samples for a single action class,
    # then trim "heavy" action classes to have no more than
    # that number of samples
    class_map = np.zeros((len(aclass_ds), len(act_names)))
    for ds_idx, item in enumerate(aclass_ds):
        _, class_num = item
        class_map[ds_idx, class_num] = 1
    support = class_map.sum(axis=0)
    support_target = int(np.min(support))
    to_keep = np.zeros((len(aclass_ds),))
    for class_num in range(len(act_names)):
        if support[class_num] <= support_target:
            to_keep[class_map[:, class_num] == 1] = 1
        else:
            # drop all but [:median_support] of these
            class_inds, = np.nonzero(class_map[:, class_num])
            perm = np.random.permutation(len(class_inds))[:support_target]
            chosen_inds = class_inds[perm]
            to_keep[chosen_inds] = 1
    rv = []
    for choose_ind in np.nonzero(to_keep)[0]:
        rv.append(aclass_ds[choose_ind])
    return rv

def merge_actions(aclass_ds, merge_map):
    for class_name in act_names:
        if class_name not in merge_map:
            merge_map[class_name] = class_name
    new_class_names = sorted({
        class_name for class_name in merge_map.values()
        if class_name is not None
    })
    new_class_nums = []
    for class_name in act_names:
        new_name = merge_map[class_name]
        if new_name is None:
            new_num = None
        else:
            new_num = new_class_names.index(new_name)
        new_class_nums.append(new_num)
    new_aclass_ds = []
    for poses, action in aclass_ds:
        new_action = new_class_nums[action]
        if new_action is None:
            continue
        new_aclass_ds.append((poses, new_action))
    return new_class_names, new_aclass_ds

The next cell is slooooow. Could probably speed it up with batching, but IMO not worth the effort at the moment.

In [None]:
if 'p2d_action_names' in dataset:
    train_aclass_ds = dataset['train_aclass_ds']
    val_aclass_ds = dataset['val_aclass_ds']
    aclass_target_names = act_names
    if DS == 'ikeadb':
        merge_map = {
            'attach leg 1': 'attach leg',
            'attach leg 2': 'attach leg',
            'attach leg 3': 'attach leg',
            'attach leg 4': 'attach leg',
            'detach leg 1': 'detach leg',
            'detach leg 2': 'detach leg',
            'detach leg 3': 'detach leg',
            'detach leg 4': 'detach leg',
            'n/a': None
        }
        _, train_aclass_ds \
            = merge_actions(train_aclass_ds, merge_map)
        aclass_target_names, val_aclass_ds \
            = merge_actions(val_aclass_ds, merge_map)

In [None]:
if 'p2d_action_names' in dataset:
    train_aclass_ds_bal = balance_aclass_ds(train_aclass_ds, aclass_target_names)
    train_act_X, train_act_Y = to_latent_ds(train_aclass_ds_bal)
    val_aclass_ds_bal = balance_aclass_ds(val_aclass_ds, aclass_target_names)
    val_act_X, val_act_Y = to_latent_ds(val_aclass_ds_bal)

In [None]:
if 'p2d_action_names' in dataset:
    print('Shapes:')
    print('train_act_X: {}'.format(train_act_X.shape))
    print('train_act_Y: {}'.format(train_act_Y.shape))
    print('val_act_X: {}'.format(val_act_X.shape))
    print('val_act_Y: {}'.format(val_act_Y.shape))

In [None]:
if 'p2d_action_names' in dataset:
    model = LinearSVC(C=1)
    print('Fitting to training set')
    model.fit(train_act_X, train_act_Y)
    print('Evaluating SVC on training set')
    train_out_Y = model.predict(train_act_X)
    print(classification_report(train_act_Y, train_out_Y,
                                target_names=aclass_target_names))

    print('Evaluating SVC on validation set')
    val_out_Y = model.predict(val_act_X)
    print(classification_report(val_act_Y, val_out_Y,
                                target_names=aclass_target_names))

# Inspecting predictions

How well is the model actually learning to predict pose sequences? The next section will try to find out by taking short pose sequences from the training set (any action), encoding them, then coming up with several predictions for their continuations (both smoothed and unsmoothed). Eventually, I will superimpose the predictions on the original frames corresponding to the poses for comparison with the ground truth. Obviously, this will require that I output the ground truth as well.

In [None]:
def make_completion(poses):
    T = len(poses)
    T_pre = T // 2
    T_post = T - T_pre
    
    prior_completion = np.zeros_like(poses)
    posterior_completion = np.zeros_like(poses)
    
    pre_latents = seq_latents(poses[:T_pre])
    prior_completion[:T_pre] \
        = posterior_completion[:T_pre] \
        = dkf.emission_fxn(pre_latents[None, ...])
        
    z = pre_latents[-1]
    assert z.ndim == 1
    # z needs to be 3D (nsamples, time, stochdim)
    z_prior = z_posterior = z.reshape((1, 1, -1))
    
    for t in range(T_pre, T):
        # completion based on prior only
        mu_prior, logcov_prior = dkf.transition_fxn(z_prior)
        z_prior = DKF_evaluate.sampleGaussian(mu_prior, logcov_prior).astype(config.floatX)
        e_prior = dkf.emission_fxn(z_prior)
        assert e_prior.ndim == 3 and e_prior.shape[:2] == (1, 1)
        prior_completion[t] = e_prior[0][0]
        
        # posterior completion (TODO: use stateful model to make this faster)
        mu_posterior, logcov_posterior = dkf.transition_fxn(z_posterior)
        # sample z_t from prior distribution to gt x_t
        z_posterior = DKF_evaluate.sampleGaussian(mu_posterior, logcov_posterior).astype(config.floatX)
        e_posterior = dkf.emission_fxn(z_posterior)
        assert e_prior.ndim == 3 and e_posterior.shape[:2] == (1, 1)
        posterior_completion[t] = e_posterior[0][0]
        # here's the trick: we replace our prior z_t with a posterior
        # z_t now that we have x
        z_posterior = seq_latents(posterior_completion[:t])[None, t-1:]
        assert z_posterior.ndim == 3
        assert z_posterior.shape[:2] == (1, 1)

    return prior_completion, posterior_completion, T_pre

def process_completions(completions, to_select=32):
    right_indices = np.random.permutation(len(completions))[:to_select]
    
    rv = []
    
    for block in np.take(completions, right_indices):
        poses = block['poses']
        mask = block['mask']
        completion_p, completion_q, crossover = make_completion(poses)
        start, stop, skip = block['start'], block['stop'], block['skip']
        # need to undo mean subtraction and parent-relative stuff
        # to reconstruct poses
        # TODO: what if this is Penn or something and I'm not using
        # parent-relative parameterisation?
        recon = lambda data: p2d_loader.reconstruct_poses(data * sigma + mu, parents).tolist()
        
        completed_block = {
            'vid_name': block['vid_name'],
            # I think these indices will be zero-based
            # (but datasets are not)
            'frame_inds': list(range(start, stop, skip)),
            'true_poses': recon(poses),
            'prior_poses': recon(completion_p),
            'posterior_poses': recon(completion_q),
            # time at which we go from using emission function 
            # on posterior latents to chaining prior evaluation
            # + using posterior on that
            'crossover_time': crossover,
            'mask': (mask != 0).tolist()
        }
        
        rv.append(completed_block)
    
    return rv

def save_completions(completions, save_subdir):
    save_dir = os.path.join(DS_DIR, save_subdir)
    try:
        os.makedirs(save_dir)
    except OSError:
        pass
    for block_id, block in enumerate(completions):
        file_path = os.path.join(save_dir, '%04d.json' % block_id)
        with open(file_path, 'w') as fp:
            json.dump(block, fp, indent=2)

In [None]:
if 'p2d_train_completions' in dataset and not use_cond:
    # TODO: figure out how to do this with action-conditional model
    print('Working on train completions')
    train_comp = process_completions(dataset['p2d_train_completions'])
    save_completions(train_comp, 'completions/train')
    
    print('Working on val completions')
    val_comp = process_completions(dataset['p2d_val_completions'])
    save_completions(val_comp, 'completions/val')