# Task Learning

1. [Segmentation](#segmentation)

Compare different approaches to building a subgoal model.

2. [Prediction](#prediction)

Seek an online computation of posterior $p(z_i | s_i, a_i, z_{-i})$
    

## Segmentation <a class="anchor" id="first-bullet"></a>

Segmentation uses Bayesian inference to discover latent subgoals in demonstration set. The three methods we will explore are
- BNIRL - Michini
- MBNIRL - Modified BNIRL (a. la. mitch)
- DPMIRL - Maske, still some concerns here

Start by prepping the dataset and importing the `learning` module.

In [1]:
%matplotlib notebook

In [2]:
import learning
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import trajectories
# import seaborn as sns

In [3]:
matplotlib.style.use('mitch-exp')

In [4]:
ls trial_data

manual_mitch_1102_1814.csv


In [5]:
path = 'trial_data/' + 'manual_mitch_1102_1814.csv'

pos, rate_sm = learning.compute_rate(path)
rate_sm = np.nan_to_num(rate_sm)

In [6]:
plt.figure()
plt.plot(pos[0][:-1], rate_sm[0])
plt.title('Boom Velocity vs. Time')
plt.xlabel('Time (s)')
plt.tight_layout()

<IPython.core.display.Javascript object>

___

In [7]:
labels = np.copy(rate_sm)
# etas = [1, 1, 0.1, 3]
thresholds = [0.2]*3 + [0.05]

for i in range(len(labels)):
#     labels[i] = learning.k_means_action_primitives(rate_sm[i], threshold=True, eta=etas[i])
    labels[i] = learning.get_action_primitives(rate_sm[i], thresholds[i])

In [8]:
plt.figure(figsize=(8, 14))
# plt.title('Action Primitive Clusters for Each Actuator')

for i in range(4):
    plt.subplot(4, 1, i+1)
    learning.cluster_plot_new(pos[0][:-1], rate_sm[i], labels[i])

plt.tight_layout()

<IPython.core.display.Javascript object>

In [9]:
labels_t = labels.transpose()
label_set = set([tuple(label) for label in labels_t])
print('There are %i unique action primitives.') % len(label_set)

There are 34 unique action primitives.


Assign numerical labels for each action primitive.

In [10]:
action_primitives = tuple([list(label_set)[i] for i in range(len(label_set))])
action_primitives

((2.0, 3.0, 2.0, 2.0),
 (2.0, 2.0, 1.0, 1.0),
 (2.0, 3.0, 3.0, 2.0),
 (2.0, 2.0, 1.0, 2.0),
 (2.0, 3.0, 3.0, 1.0),
 (2.0, 1.0, 3.0, 2.0),
 (2.0, 3.0, 2.0, 1.0),
 (2.0, 1.0, 1.0, 3.0),
 (1.0, 1.0, 2.0, 2.0),
 (2.0, 2.0, 3.0, 2.0),
 (2.0, 2.0, 2.0, 2.0),
 (2.0, 1.0, 2.0, 2.0),
 (2.0, 2.0, 2.0, 1.0),
 (1.0, 2.0, 2.0, 1.0),
 (2.0, 2.0, 1.0, 3.0),
 (3.0, 2.0, 1.0, 3.0),
 (2.0, 1.0, 1.0, 2.0),
 (1.0, 1.0, 2.0, 1.0),
 (2.0, 2.0, 3.0, 3.0),
 (3.0, 2.0, 2.0, 1.0),
 (2.0, 1.0, 2.0, 3.0),
 (3.0, 2.0, 2.0, 2.0),
 (1.0, 2.0, 2.0, 2.0),
 (3.0, 2.0, 3.0, 2.0),
 (1.0, 2.0, 3.0, 2.0),
 (2.0, 3.0, 1.0, 2.0),
 (3.0, 2.0, 1.0, 2.0),
 (3.0, 2.0, 1.0, 1.0),
 (1.0, 1.0, 2.0, 3.0),
 (2.0, 2.0, 3.0, 1.0),
 (1.0, 3.0, 2.0, 2.0),
 (3.0, 2.0, 2.0, 3.0),
 (2.0, 2.0, 2.0, 3.0),
 (1.0, 2.0, 2.0, 3.0))

In [11]:
action_primitives.index((2, 2, 2, 2))

10

___

### Action Primitive Visualization

Visualize the action primitives categories during demonstration.

In [12]:
action_class = np.zeros((len(labels_t)))
for i, primitive in enumerate(labels_t):
    action_class[i] = action_primitives.index(tuple(primitive))

In [13]:
plt.figure()
plt.plot(pos[0][:-1], action_class)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x11944be50>]

___

## Consolidate

Consolidate action primitive segments.

In [14]:
labels_t

array([[ 2.,  2.,  2.,  2.],
       [ 2.,  2.,  2.,  2.],
       [ 2.,  2.,  2.,  2.],
       ..., 
       [ 2.,  2.,  1.,  2.],
       [ 2.,  2.,  1.,  2.],
       [ 2.,  2.,  1.,  2.]])

In [15]:
from itertools import groupby

In [16]:
indexes = []
action_temp = []

for key, group in groupby(zip(range(len(labels_t)), labels_t.tolist()), lambda x: x[1]):
    temp = next(group)
    indexes.append(temp[0])
    action_temp.append(temp[1])

action_condense = np.array(action_temp)

len(indexes)

161

In [17]:
states = np.array(pos[1:]).transpose()
states_condense = np.array([states[i] for i in indexes])
states_condense.shape

(161, 4)

___

# 1.a. BNIRL 

Bayesian nonparametric reinforcement learning, Michini *et. al.*

In [201]:
# Prep the state position array
assert states_condense.shape == action_condense.shape

In [202]:
# Initialize the subgoal partition labels all to zero, and size to track convergence
partitions = [0]*(len(states_condense))
iters = 500
partition_dist = np.zeros((iters, len(states_condense)))
size = np.zeros((iters))

# Run BNIRL for _ iterations
for i in range(iters):
    partitions = learning.bnirl_sampling_3(states_condense, partitions, action_condense, verbose=False, eta=0.0000001)
    partition_dist[i] = np.array(partitions)
    size[i] = (len(set(partitions)))
    

In [203]:
plt.figure()
plt.plot(size)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x14d2aec50>]

In [205]:
partition_dist[50:].shape

(450, 161)

In [206]:
from scipy import stats

In [207]:
modes = np.zeros(len(states))
brn = 50

# Discard 'brn' iterations for burn-in
for i, sample in enumerate(partition_dist[brn:].T):
    modes[i], _ = stats.mode(sample)

mode_set = set([int(mode) for mode in modes])
mode_set

{0, 73, 148, 149}

# Visualize BNIRL Subgoals

Compute forward kinematics and visualize subgoals.

In [22]:
from mpl_toolkits.mplot3d import Axes3D

In [23]:
states_condense.T[0].shape

(161,)

In [24]:
def orient_plot(ax):
    ax.set_xlim3d([-20, 80])
    ax.set_ylim3d([0, 80])
    ax.set_zlim3d([0, 50])
    ax.view_init(azim=-142, elev=14)

In [117]:
# Vectorize the forward kinematics
forward_kin_vec = np.vectorize(trajectories.forward_kin_v)

states_xyz = np.array(forward_kin_vec(trajectories.exc, states_condense.T[3],
                            states_condense.T[0], states_condense.T[1],
                            states_condense.T[2]))

# Correct for wacky base frame in forward kinematics
# Adding 17.1 to Z dimension, moves the frame down to ground level
states_xyz_bias = np.array(states_xyz)
states_xyz_bias[2] = states_xyz_bias[2] + 17.1

states_xyz_bias.shape

(3, 161)

## Visualize all states in observation set

In [26]:
fig = plt.figure()
ax = fig.gca(projection='3d')

plt.title('State Observations in Workspace')

ax.scatter(*np.split(states_xyz_bias, 3), zdir='z')
orient_plot(ax)

<IPython.core.display.Javascript object>

___

In [213]:
states_condense.shape, action_condense.shape

((161, 4), (161, 4))

In [214]:
states_project = states_condense + (action_condense - 2)*np.array([0.1, 0.1, 0.1, 0.002])

states_project_xyz_bias = np.array(forward_kin_vec(trajectories.exc, states_project.T[3],
                                    states_project.T[0], states_project.T[1],
                                    states_project.T[2]))

states_project_xyz_bias[2] = states_project_xyz_bias[2] + 17.1
states_project_xyz_bias.shape

(3, 161)

In [215]:
fig = plt.figure()
ax = fig.gca(projection='3d')
args = np.split(states_xyz_bias, 3) + np.split(states_project_xyz_bias - states_xyz_bias, 3)
plt.title('State-Action Observations in Workspace')

ax.scatter(*np.split(states_xyz_bias, 3), zdir='z')
ax.quiver(*args, length=6, normalize=True, color='b', linewidth=0.6)
orient_plot(ax)

<IPython.core.display.Javascript object>

___

## Visualize Subgoals

In [216]:
# Extract subgoals in XYZ from states in XYZ
# NOTE SUBGOALS ARE ALSO BIASED TO GND FRAME
subgoals_xyz = states_xyz_bias.T[list(mode_set)]
subgoals_xyz

array([[ 50.91466506,   0.        ,  26.6847627 ],
       [  4.33687385,  44.93739436,  12.66352989],
       [ -2.14192048,  34.04485118,  13.52399596],
       [ -2.14542056,  34.10048323,  20.16500008]])

In [218]:
fig = plt.figure()
ax = fig.gca(projection='3d')

plt.title('BNIRL Subgoal Locations in End-Effector Workspace', family='serif', fontsize=14)
plt.xlabel('X Position (cm)', family='serif', labelpad=10, fontsize=14)
plt.ylabel('Y Position (cm)', family='serif', labelpad=10, fontsize=14)
ax.set_zlabel('Z Position (cm)', family='serif', labelpad=10, fontsize=14)
# plt.zlabel('z Position (cm)')

for j in mode_set:
    trajectories.draw_exc(ax, states_condense[j], lw=2)
    
ax.scatter(*np.split(subgoals_xyz.T, 3), s=100)

orient_plot(ax)
plt.tight_layout()
plt.savefig('subgoal_BNIRL.png',dpi=600)

<IPython.core.display.Javascript object>

# Animate demonstration with subgoals

Test the draw_exc() func by animating all states. See exc-render.ipynb for examples of draw_exc(). 

In [219]:
from jupyter_extras import log_progress
from matplotlib.animation import FuncAnimation

In [220]:
states.shape

(1712, 4)

In [221]:
fig = plt.figure()
ax = fig.gca(projection='3d')

# for i in mode_set - set([101, 149]):
#     trajectories.draw_exc(ax, states[i])

def update(i):
    plt.cla()
    plt.title('Demonstration with BNIRL Subgoals', family='serif', fontsize=14)
    ax.scatter(*np.split(subgoals_xyz.T, 3), s=100)
    trajectories.draw_exc(ax, states[i*10])

# There's still some weirdness on the log_progress func
anim = FuncAnimation(fig, update, 
                     frames=range(0, len(states)/10),
                     interval=100)
# anim.save('BNIRL_subgoal_states.gif', dpi=80, writer='imagemagick')

<IPython.core.display.Javascript object>

___

# Animate cluster formation

Show clusters over iteration of Gibbs Sampling

In [225]:
fig = plt.figure()
ax = fig.gca(projection='3d')

def update(i):
    plt.cla()
    modes, _ = stats.mode(partition_dist[20:21+i])
    mode_set = set([int(mode) for mode in modes[0]])
    
    for mode in mode_set:
        c = np.count_nonzero(modes[0] == mode)
        ax.scatter(*np.split(states_xyz_bias.T[mode], 3), zdir='z', s=c)
    orient_plot(ax)
    plt.title('BNIRL Subgoal Posterior')

anim = FuncAnimation(fig, update, frames=np.arange(0, 100), interval=200)
# anim.save('BNIRL_clustering.gif', dpi=80, writer='imagemagick')

<IPython.core.display.Javascript object>

# Observation i Posterior 

Illustrate the posterior distribution for observation $O_i$, along with the state-action pair $(s_i, a_i)$.

In [229]:
fig = plt.figure()
ax = fig.gca(projection='3d')
obs = 25

args = np.split(states_xyz_bias[:, obs], 3) + np.split(states_project_xyz_bias[:, obs] - states_xyz_bias[:, obs], 3)

def update(i):
    plt.cla()
    samples_i = partition_dist[20:21+i, obs]
    partition_set = set([int(a) for a in partition_dist[20:21+i, obs]])

    ax.scatter(*np.split(states_xyz_bias[:, obs], 3), zdir='z')
    ax.quiver(*args, length=6, normalize=True, color='b', linewidth=0.6)
    for partition in partition_set:
        c = np.count_nonzero(samples_i == partition)
        ax.scatter(*np.split(states_xyz_bias.T[partition], 3), zdir='z', s=c)
    orient_plot(ax)
    plt.title('BNIRL Subgoal Posterior')

anim = FuncAnimation(fig, update, frames=np.arange(0, 80), interval=200)
# anim.save('BNIRL_clustering2.gif', dpi=80, writer='imagemagick')

<IPython.core.display.Javascript object>

# Excavator Reaching for Subgoals

In [153]:
mode_set

{0, 16, 75, 101, 148, 149}

In [160]:
[states_xyz[:, i] for i in mode_set]
# states_xyz.shape

[array([ 50.91466506,   0.        ,  13.2847627 ]),
 array([ 35.94563085,  16.018123  ,   6.67204146]),
 array([ 11.52608972,  43.1288437 ,  12.1086712 ]),
 array([  8.2381042 ,  44.61176463,  10.37976812]),
 array([ -2.14192048,  34.04485118,   0.12399596]),
 array([ -2.14542056,  34.10048323,   6.76500008])]

In [161]:
subgoals_xyz

[(50.914665062317795, 0.0, 13.284762696079607),
 (50.756293397712, 0.0, 13.178975347995323),
 (11.052831196100144, 59.8543417265516, -9.118924765334455),
 (13.234900206880187, 62.668569441798944, 9.142381963510978),
 (8.223271880146408, 57.779719829496294, 19.914465539294415),
 (10.967010444885522, 59.38959703993757, -9.273969603304366)]

In [None]:
plt.figure()

[states]

___

# DPMIRL

Maske, *et. al.*, Dirichlet process means clustering algorithm using DP-means from [Michael Jordan's paper](http://icml.cc/2012/papers/291.pdf)

In [58]:
reload(learning)

<module 'learning' from 'learning.py'>

In [77]:
from sklearn.preprocessing import normalize

In [79]:
states_condense_normal = normalize(states_condense)
states_condense_normal.shape

(161, 4)

In [88]:
states_xyz.shape

(3, 161)

## Cluster in the Actuator Space

Cluster in the normalized actuator space and show subgoals and convergence.

In [160]:
partition_DPk = [0]*len(states_condense)
iters = 20
mse = np.zeros((iters))
sg_means = []

for i in range(iters):
    partition_DPk = learning.dp_kmeans(states_condense_normal, partition_DPk, lamb=0.4)
    mse[i], sgs = learning.compute_means(partition_DPk, states_condense)
    sg_means.append(sgs)

In [161]:
fig = plt.figure()
ax = fig.gca(projection='3d')

plt.title('%i clusters after %i iterations' % (len(set(partition_DPk)), iters))

ax.scatter(*np.split(states_xyz_bias, 3), c=partition_DPk, zdir='z')
ax.scatter(*forward_kin_vec(trajectories.exc, sg_means[-1][:, 3], 
           sg_means[-1][:, 0], sg_means[-1][:, 1], sg_means[-1][:, 2], bias=17.1), marker='+')
orient_plot(ax)

# plt.subplot(2, 1, 2)
# plt.plot(mse)
# plt.title('Mean Squared Error at i\'th iteration')

<IPython.core.display.Javascript object>

In [162]:
fig = plt.figure()
plt.plot(mse)
plt.title('Mean Squared Error at i\'th iteration')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x127707ad0>

## Clustering the End-Effector Space

Results in the actuator space are not very good, so we will cluster the end-effector space by tranforming the states first.

In [121]:
states_condense_xyz = np.array(forward_kin_vec(trajectories.exc, states_condense[:, 3],
                                     states_condense[:, 0], states_condense[:, 1],
                                     states_condense[:, 2], bias=17.1))

In [122]:
states_condense_xyz.shape

(3, 161)

In [135]:
partition_DPk = [0]*len(states_condense)
iters = 20
mse = np.zeros((iters))
sg_means = []

for i in range(iters):
    partition_DPk = learning.dp_kmeans(states_condense_xyz.T, partition_DPk, lamb=25.0)
    mse[i], sgs = learning.compute_means(partition_DPk, states_condense)
    sg_means.append(sgs)

In [143]:
fig = plt.figure()
ax = fig.gca(projection='3d')

plt.title('%i clusters after %i iterations' % (len(set(partition_DPk)), iters))

ax.scatter(*np.split(states_xyz_bias, 3), c=partition_DPk, zdir='z')
ax.scatter(*forward_kin_vec(trajectories.exc, sg_means[-1][:, 3], 
           sg_means[-1][:, 0], sg_means[-1][:, 1], sg_means[-1][:, 2], bias=17.1), c=list(set(partition_DPk)), marker='x', s=100)
orient_plot(ax)

# plt.subplot(2, 1, 2)
# plt.plot(mse)
# plt.title('Mean Squared Error at i\'th iteration')

<IPython.core.display.Javascript object>

In [155]:
fig = plt.figure()
ax = plt.gca()

ln1 = ax.plot(mse, label='MSE')
ax2 = ax.twinx()
ln2 = ax2.plot([len(sgs) for sgs in sg_means], label='No. of Clusters', c='b')

plt.title('MSE and No. of Clusters at i\'th iteration')

# added these three lines
lns = ln1+ln2
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc=0)

# ax.grid()
ax2.grid()

ax.set_xlabel("Iteration")
ax.set_ylabel('MSE')
ax2.set_ylabel('Clusters')
plt.tight_layout()

<IPython.core.display.Javascript object>

Not much information there....