In [None]:
from __future__ import division
import pickle
import os
import types
from copy import deepcopy as copy
import logging
import time

import gym
from gym import spaces
import numpy as np
from pyglet.window import key as pygkey
import tensorflow as tf

In [None]:
from matplotlib import pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
%matplotlib inline

In [None]:
import matplotlib as mpl
mpl.rc('savefig', dpi=300)
mpl.rc('text', usetex=True)

In [None]:
logger = logging.getLogger()
assert len(logger.handlers) == 1
handler = logger.handlers[0]
handler.setLevel(logging.WARNING)

In [None]:
newton_conf = {'fps': 40}
aristotle_conf = {'fps': 60}

In [None]:
conf_choice = 'newton'

In [None]:
data_dir = os.path.join('data', '5.1-lander-%s' % conf_choice)

In [None]:
sess = tf.Session()

create envs, pilot policies

In [None]:
throttle_mag = 0.75
def disc_to_cont(action):
  if type(action) == np.ndarray:
    return action
  # main engine
  if action < 3:
    m = -throttle_mag
  elif action < 6:
    m = throttle_mag
  else:
    raise ValueError
  # steering
  if action % 3 == 0:
    s = -throttle_mag
  elif action % 3 == 1:
    s = 0
  else:
    s = throttle_mag
  return np.array([m, s])

In [None]:
n_act_dim = 6
n_obs_dim = 9

In [None]:
max_ep_len = 1000

In [None]:
train_goals = np.arange(1, 10, 1).astype(int)
n_train_tasks = train_goals.size

In [None]:
assert conf_choice == 'newton'
def make_lander_env(goal=None):
  env = gym.make('LunarLanderContinuous-v2')
  env.unwrapped.goal = goal
  env.action_space = spaces.Discrete(n_act_dim)
  env.unwrapped._step_orig = env.unwrapped._step
  def _step(self, action):
    obs, r, done, info = self._step_orig(disc_to_cont(action))
    return obs, r, done, info
  env.unwrapped._step = types.MethodType(_step, env.unwrapped)
  env.unwrapped.fps = newton_conf['fps']
  return env

In [None]:
def build_mlp(
    input_placeholder,
    output_size,
    scope,
    n_layers=1,
    size=256,
    activation=tf.nn.relu,
    output_activation=None,
    reuse=False
  ):
  out = input_placeholder
  with tf.variable_scope(scope, reuse=reuse):
    for _ in range(n_layers):
      out = tf.layers.dense(out, size, activation=activation)
    out = tf.layers.dense(out, output_size, activation=output_activation)
  return out

In [None]:
class NNInvDynamicsModel():
  
  def __init__(self,
      n_layers,
      size,
      activation,
      normalization,
      batch_size,
      iterations,
      learning_rate,
      sess,
      invdyn_scope
    ):
    self.scope = invdyn_scope
    with tf.variable_scope(self.scope, reuse=None):
      self.obs_t_ph = tf.placeholder(tf.float32, [None, n_obs_dim])
      self.obs_delta_t_ph = tf.placeholder(tf.float32, [None, n_obs_dim])
      self.act_t_ph = tf.placeholder(tf.int32, [None])
      obs_cat_delta_t = tf.concat([self.obs_t_ph, self.obs_delta_t_ph], axis=1)
      self.act_logits = build_mlp(
        obs_cat_delta_t, n_act_dim, invdyn_scope, n_layers=n_layers, size=size,
        activation=activation
      )
      self.act_preds = tf.argmax(self.act_logits, axis=1)
      self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=self.act_t_ph,
        logits=self.act_logits,
      ))

      self.update_op = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)

      tf.global_variables_initializer().run(session=sess)

    self.sess = sess
    self.iterations = iterations
    self.batch_size = batch_size
    self.normalization = normalization

  def fit(self, data):
    obs, actions, rewards, next_obs, dones = data
    mean_obs, std_obs, mean_deltas, std_deltas = self.normalization
    normed_obs = normalize(obs, mean_obs, std_obs)
    deltas = next_obs - obs
    normed_deltas = normalize(deltas, mean_deltas, std_deltas)

    example_idxes = range(len(obs))
    def sample_batch(size):
      idxes = random.sample(example_idxes, size)
      return normed_obs[idxes], actions[idxes], normed_deltas[idxes]

    n_iters = self.iterations * len(obs) // self.batch_size
    with tf.variable_scope(self.scope, reuse=None):
      for i in range(n_iters):
        batch_obs_t, batch_act_t, batch_obs_delta = sample_batch(self.batch_size)
        feed_dict = {
          self.obs_t_ph: batch_obs_t,
          self.act_t_ph: batch_act_t,
          self.obs_delta_t_ph: batch_obs_delta
        }
        [loss, _] = self.sess.run([self.loss, self.update_op], feed_dict=feed_dict)
        print('%d %d %f' % (i, n_iters, loss))

  def predict(self, states, next_states):
    mean_obs, std_obs, mean_deltas, std_deltas = self.normalization
    normed_states = normalize(states, mean_obs, std_obs)
    normed_deltas = normalize(next_states - states, mean_deltas, std_deltas)
    with tf.variable_scope(self.scope, reuse=None):
      feed_dict = {
        self.obs_t_ph: normed_states,
        self.obs_delta_t_ph: normed_deltas
      }
      return self.sess.run(self.act_preds, feed_dict=feed_dict)

In [None]:
def normalize(data, mean, std, eps=1e-9):
  return (data - mean) / (std + eps)

def unnormalize(data, mean, std, eps=1e-9):
  return data * (std + eps) + mean

In [None]:
n_layers = 2
layer_size = 64
activation = tf.nn.relu
learning_rate = 1e-4
batch_size = 64
iterations = 10

In [None]:
with open(os.path.join(data_dir, 'invdyn_normalization.pkl'), 'rb') as f:
  normalization = pickle.load(f)

In [None]:
with open(os.path.join(data_dir, 'invdyn_scope.pkl'), 'rb') as f:
  invdyn_scope = pickle.load(f)

In [None]:
true_invdyn_model = NNInvDynamicsModel(
  n_layers=n_layers,
  size=layer_size,
  activation=activation,
  normalization=normalization,
  batch_size=batch_size,
  iterations=iterations,
  learning_rate=learning_rate,
  sess=sess,
  invdyn_scope=invdyn_scope
)

In [None]:
invdyn_path = os.path.join(data_dir, 'invdyn.tf')

In [None]:
def load_tf_vars(sess, scope, path):
  saver = tf.train.Saver([v for v in tf.global_variables() if v.name.startswith(scope + '/')])
  saver.restore(sess, path)

In [None]:
load_tf_vars(sess, invdyn_scope, invdyn_path)

In [None]:
assert conf_choice == 'aristotle'
def make_lander_env(goal=None):
  env = gym.make('LunarLanderContinuous-v2')
  env.action_space = spaces.Discrete(n_act_dim)
  env.unwrapped._step_orig = env.unwrapped._step
  def _step(self, action):
    if type(action) in [np.int64, int] or len(action) == 1:
      if type(action) == np.ndarray:
        action = action[0]
        
      if self.curr_obs is not None:
        intended_state = self.sim_step(disc_to_cont(action), **aristotle_conf)[0]
        intended_action = true_invdyn_model.predict(
          np.array([self.curr_obs]), np.array([intended_state]))[0]
      else:
        intended_action = action
        
      obs, r, done, info = self._step_orig(disc_to_cont(intended_action))
      info['intended_action'] = intended_action
      return obs, r, done, info
    else:
      return self._step_orig(action)
  env.unwrapped._step = types.MethodType(_step, env.unwrapped)
  env.unwrapped.fps = newton_conf['fps']
  env.unwrapped.goal = goal
  return env

In [None]:
def run_ep(policy, env, max_ep_len=max_ep_len, render=False, task_idx=None):
  global human_agent_action
  global human_agent_active
  human_agent_action = init_human_action()
  human_agent_active = False
  obs = env.reset()
  done = False
  totalr = 0.
  prev_obs = obs
  rollout = []
  for step_idx in range(max_ep_len+1):
    if done:
      break
    action = policy(obs)
    obs, r, done, info = env.step(action)
    rollout.append((prev_obs, action, r, obs, float(done), task_idx, info.get('intended_action', action)))
    prev_obs = obs
    if render:
      env.render()
    totalr += r
  return rollout

In [None]:
init_human_action = lambda: [0, 1]

In [None]:
human_agent_action = init_human_action()
human_agent_active = False

LEFT = pygkey.LEFT
RIGHT = pygkey.RIGHT
UP = pygkey.UP
DOWN = pygkey.DOWN

def key_press(key, mod):
  global human_agent_action
  global human_agent_active
  a = int(key)
  if a == LEFT:
    human_agent_action[1] = 0
    human_agent_active = True
  elif a == RIGHT:
    human_agent_action[1] = 2
    human_agent_active = True
  elif a == UP:
    human_agent_action[0] = 1
    human_agent_active = True
  elif a == DOWN:
    human_agent_action[0] = 0
    human_agent_active = True

def key_release(key, mod):
  global human_agent_action
  global human_agent_active
  a = int(key)
  if a == LEFT or a == RIGHT:
    human_agent_action[1] = 1
    human_agent_active = False
  elif a == UP or a == DOWN:
    human_agent_action[0] = 0
    human_agent_active = False

def encode_human_action(action):
  return action[0]*3+action[1]

In [None]:
def human_pilot_policy(obs):
  global human_agent_action
  return encode_human_action(human_agent_action)

In [None]:
pilot_id = 'andreea'

In [None]:
n_demo_eps_per_task = [1 for _ in range(n_train_tasks)]

In [None]:
env = make_lander_env(goal=train_goals[0])

In [None]:
env.render()
env.unwrapped.viewer.window.on_key_press = key_press
env.unwrapped.viewer.window.on_key_release = key_release

In [None]:
demo_rollouts = [[] for _ in range(n_train_tasks)]

In [None]:
train_task_idx = 0

In [None]:
time.sleep(2)
while any(len(task_rollouts) < n_demo_eps_of_task for task_rollouts, n_demo_eps_of_task \
          in zip(demo_rollouts, n_demo_eps_per_task)):
  if len(demo_rollouts[train_task_idx]) < n_demo_eps_per_task[train_task_idx]:
    env.unwrapped.goal = train_goals[train_task_idx]
    demo_rollouts[train_task_idx].append(
      run_ep(human_pilot_policy, env, render=True, task_idx=train_task_idx))
    time.sleep(2)
  train_task_idx = (train_task_idx + 1) % n_train_tasks

In [None]:
min(len(task_rollouts) for task_rollouts in demo_rollouts), sum(len(task_rollouts) for task_rollouts in demo_rollouts)

In [None]:
env.close()

In [None]:
with open(os.path.join(data_dir, '%s_pilot_policy_demo_rollouts.pkl' % pilot_id), 'wb') as f:
  pickle.dump(demo_rollouts, f, pickle.HIGHEST_PROTOCOL)

In [None]:
pilot_ids = [
  'spike',
  'jet',
  'faye',
  'vicious',
  'ed',
  'ein',
  'julia',
  'punch',
  'judy',
  'lin',
  'grencia',
  'laughingbull'
]

In [None]:
demo_rollouts = [[] for _ in range(n_train_tasks)]

for pilot_id in pilot_ids:
  with open(os.path.join(data_dir, '%s_pilot_policy_demo_rollouts.pkl' % pilot_id), 'rb') as f:
    pilot_demo_rollouts = pickle.load(f)
    for task_idx, task_rollouts in enumerate(pilot_demo_rollouts):
      demo_rollouts[task_idx].extend(task_rollouts)
      
with open(os.path.join(data_dir, 'human_pilot_policy_demo_rollouts.pkl'), 'wb') as f:
  pickle.dump(demo_rollouts, f, pickle.HIGHEST_PROTOCOL)

In [None]:
newton_data_dir = os.path.join('data', '5.1-lander-newton')
aristotle_data_dir = os.path.join('data', '5.1-lander-aristotle')

In [None]:
newton_rollouts_of_pilot = {k: [] for k in pilot_ids}
aristotle_rollouts_of_pilot = {k: [] for k in pilot_ids}

In [None]:
for pilot_id in pilot_ids:
  with open(os.path.join(newton_data_dir, '%s_pilot_policy_demo_rollouts.pkl' % pilot_id), 'rb') as f:
    newton_rollouts_of_pilot[pilot_id].extend(sum(pickle.load(f), []))
  with open(os.path.join(aristotle_data_dir, '%s_pilot_policy_demo_rollouts.pkl' % pilot_id), 'rb') as f:
    aristotle_rollouts_of_pilot[pilot_id].extend(sum(pickle.load(f), []))

In [None]:
stats_of_pilot = {}
for pilot_id in pilot_ids:
  newton_rollouts = newton_rollouts_of_pilot[pilot_id]
  aristotle_rollouts = aristotle_rollouts_of_pilot[pilot_id]
  newton_succ = [1 if x[-1][2] == 100 else 0 for x in newton_rollouts]
  aristotle_succ = [1 if x[-1][2] == 100 else 0 for x in aristotle_rollouts]
  newton_fail = [1 if x[-1][2] == -100 else 0 for x in newton_rollouts]
  aristotle_fail = [1 if x[-1][2] == -100 else 0 for x in aristotle_rollouts]
  stats_of_pilot[pilot_id] = (
    np.mean(newton_succ), np.std(newton_succ) / np.sqrt(len(newton_succ)), len(newton_succ),
    np.mean(aristotle_succ), np.std(aristotle_succ) / np.sqrt(len(aristotle_succ)), len(aristotle_succ),
    np.mean(newton_fail), np.std(newton_fail) / np.sqrt(len(newton_fail)), len(newton_fail),
    np.mean(aristotle_fail), np.std(aristotle_fail) / np.sqrt(len(aristotle_fail)), len(aristotle_fail)
  )

In [None]:
out = []
for pilot_id in pilot_ids:
  s = stats_of_pilot[pilot_id]
  ctrl_succ = s[0]
  ctrl_fail = s[6]
  treat_succ = s[3]
  treat_fail = s[9]
  out.append([pilot_id, 0, ctrl_succ, ctrl_fail])
  out.append([pilot_id, 1, treat_succ, treat_fail])

In [None]:
with open(os.path.join(data_dir, 'lander_hyp_test.csv'), 'w') as f:
  f.write('userid,assistance,successrate,crashrate\n')
  f.write('\n'.join([','.join([str(z) for z in x]) for x in out]))

In [None]:
solo_pi_succs = []
solo_pi_crashes = []
comb_succs = []
comb_crashes = []
for pilot_id in pilot_ids:
  s = stats_of_pilot[pilot_id]
  ctrl_succ = s[0]
  ctrl_fail = s[6]
  treat_succ = s[3]
  treat_fail = s[9]
  solo_pi_succs.append(ctrl_succ)
  solo_pi_crashes.append(ctrl_fail)
  comb_succs.append(treat_succ)
  comb_crashes.append(treat_fail)

In [None]:
sorted(zip(pilot_ids, solo_pi_succs), key=lambda x: x[1])

In [None]:
sorted(zip(pilot_ids, comb_succs), key=lambda x: x[1])

In [None]:
mpl.rcParams.update({'font.size': 20})

In [None]:
plt.xlabel('Crash Rate')
plt.ylabel('Success Rate')
plt.title(r'Lunar Lander User Study (%d users)' % len(solo_pi_crashes))
plt.scatter(
  solo_pi_crashes, solo_pi_succs, label='Unassisted', 
  color='gray', s=100, marker='o')
plt.scatter(
  comb_crashes, comb_succs, label='Assisted', 
  color='orange', s=100, marker='^')
plt.legend(loc='upper right')
plt.ylim([-0.05, 1.05])
plt.xlim([-0.05, 1.05])
plt.savefig(os.path.join(data_dir, 'lander-user-study-fig.pdf'), bbox_inches='tight')
plt.show()

In [None]:
plt.xlabel('Unassisted Success Rate')
plt.ylabel('Assisted Success Rate')
plt.title('Lunar Lander User Study (%d users)' % len(solo_pi_succs))
plt.plot([-0.05, 1.05], [-0.05, 1.05], linestyle='--', color='gray')
plt.scatter(solo_pi_succs, comb_succs, color='orange', linewidth=0, s=100)
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.savefig(os.path.join(data_dir, 'lander-user-study-succ.pdf'), bbox_inches='tight')
plt.show()

In [None]:
plt.xlabel('Unassisted Crash Rate')
plt.ylabel('Assisted Crash Rate')
plt.title('Lunar Lander User Study (%d users)' % len(solo_pi_succs))
plt.plot([-0.05, 1.05], [-0.05, 1.05], linestyle='--', color='gray')
plt.scatter(solo_pi_crashes, comb_crashes, color='orange', linewidth=0, s=100)
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.savefig(os.path.join(data_dir, 'lander-user-study-crash.pdf'), bbox_inches='tight')
plt.show()

In [None]:
survey_before = {
  'ed': [7, 5, 1, 2, 2, 3, 7, 5, 4, 3],
  'lin': [3, 1, 1, 1, 7, 1, 7, 2, 2, 2],
  'laughingbull': [6, 6, 2, 2, 7, 2, 7, 6, 7, 2],
  'ein': [7, 2, 1, 1, 6, 2, 7, 7, 7, 2],
  'judy': [7, 4, 1, 1, 7, 2, 7, 6, 7, 7],
  'punch': [1, 2, 2, 6, 4, 1, 4, 2, 2, 2],
  'spike': [3, 4, 1, 1, 7, 1, 6, 6, 1, 1],
  'jet': [2, 2, 1, 2, 6, 2, 6, 5, 5, 4],
  'faye': [3, 5, 1, 1, 3, 4, 7, 5, 6, 5],
  'julia': [2, 2, 1, 1, 5, 1, 7, 6, 6, 2],
  'vicious': [2, 2, 1, 1, 1, 3, 5, 4, 4, 2],
  'grencia': [4, 2, 1, 1, 7, 1, 7, 1, 7, 1]
}

In [None]:
survey_after = {
  'punch': [6, 6, 5, 6, 7, 5, 5, 5, 6, 6],
  'judy': [7, 6, 3, 5, 7, 4, 7, 7, 7, 7],
  'ein': [7, 6, 3, 3, 1, 3, 7, 7, 7, 5],
  'laughingbull': [7, 6, 6, 5, 7, 5, 7, 7, 6, 6],
  'lin': [6, 6, 1, 1, 7, 2, 7, 6, 3, 2],
  'ed': [7, 7, 3, 3, 6, 5, 7, 6, 5, 5],
  'spike': [6, 5, 5, 5, 7, 6, 7, 6, 6, 7],
  'jet': [4, 5, 1, 1, 7, 3, 7, 6, 5, 4],
  'faye': [5, 6, 2, 2, 5, 4, 7, 6, 6, 6],
  'julia': [5, 6, 1, 1, 7, 3, 7, 6, 3, 4],
  'vicious': [5, 4, 3, 3, 6, 5, 6, 5, 6, 6],
  'grencia': [6, 7, 3, 2, 7, 3, 7, 5, 6, 5]
}

In [None]:
raw_qs = [
  "I enjoyed playing the game",
  "I improved over time",
  "I didn't crash",
  "I didn't fly out of bounds",
  "I didn't run out of time",
  "I landed between the flags",
  "I understood how to complete the task",
  "I intuitively understood the physics of the game",
  "My actions were carried out",
  "My intended actions were carried out"
]

In [None]:
n_users = len(survey_before)
n_qs = len(raw_qs)
mat_before = np.zeros((n_users, n_qs))
for i, (user, resp) in enumerate(survey_before.items()):
  mat_before[i, :] = resp
mat_after = np.zeros((n_users, n_qs))
for i, (user, resp) in enumerate(survey_after.items()):
  mat_after[i, :] = resp

In [None]:
for i, q in enumerate(raw_qs):
  print('%s &  & %0.2f & %0.2f \\\\' % (q, mat_before[:, i].mean(), mat_after[:, i].mean()))

In [None]:
with open(os.path.join(data_dir, 'survey_hyp_test.csv'), 'w') as f:
  f.write('userid,assistance,%s\n' % ','.join(['Q%d' % i for i in range(len(raw_qs))]))
  f.write('\n'.join([','.join([userid, '0', ','.join([str(x) for x in survey_before[userid]])]) + '\n' + ','.join([userid, '1', ','.join([str(x) for x in survey_after[userid]])]) for userid in pilot_ids]))

In [None]:
def onehot_decode(x):
  l = np.nonzero(x)[0]
  assert len(l) == 1
  return l[0]

In [None]:
NOOP = 1
NOOPS = [NOOP]
def compute_in_events_for_comb_traj(traj, acts):
  ts = [t for t, obs in enumerate(traj[:-1]) if onehot_decode(obs[-6:]) != NOOP]
  return list(np.array(ts) / len(traj))

In [None]:
analysis_pilot_ids = ['spike', 'jet', 'faye', 'julia', 'vicious', 'grencia']

In [None]:
eval_rollouts = []
comb_rollouts = []
for pilot_id in analysis_pilot_ids:
  with open(os.path.join(newton_data_dir, '%s_pilot_policy_demo_rollouts.pkl' % pilot_id), 'rb') as f:
    eval_rollouts.extend([list(zip(*rollout)) for rollout in sum(pickle.load(f), [])])
  with open(os.path.join(aristotle_data_dir, '%s_pilot_policy_demo_rollouts.pkl' % pilot_id), 'rb') as f:
    comb_rollouts.extend([list(zip(*rollout)) for rollout in sum(pickle.load(f), [])])

In [None]:
def n_act_changes(seq):
  return np.mean([1 if x != y else 0 for x, y in zip(seq[:-1], seq[1:])])

In [None]:
def compute_act_durations(actions):
  durations = []
  last_dur = 1
  for prev, nxt in zip(actions[:-1], actions[1:]):
    if prev != nxt:
      durations.append(last_dur)
      last_dur = 1
    else:
      last_dur += 1
  return durations

In [None]:
def compute_speeds(states):
  return [np.linalg.norm(x[2:4]) for x in states]

In [None]:
n_changes = []
rews = []
outcomes = []
ins = []
act_durations = []
speeds = []
for rollout in eval_rollouts:
  n_changes.append(n_act_changes(rollout[1]))
  act_durations.append(np.mean(compute_act_durations(rollout[1])))
  speeds.append(np.mean(compute_speeds(rollout[0])))
  rews.append(sum(rollout[2]))
  outcomes.append(rollout[2][-1] if rollout[2][-1] in [-100, 100] else 0)
  ts = [t for t, a in enumerate(rollout[1]) if a != NOOP]
  ins.extend(list(np.array(ts) / len(rollout[1])))

In [None]:
comb_n_changes = []
comb_rews = []
comb_outcomes = []
comb_ins = []
comb_act_durations = []
comb_speeds = []
for rollout in comb_rollouts:
  comb_n_changes.append(n_act_changes(rollout[-1]))
  comb_act_durations.append(np.mean(compute_act_durations(rollout[-1])))
  comb_speeds.append(np.mean(compute_speeds(rollout[0])))
  comb_rews.append(sum(rollout[2]))
  comb_outcomes.append(rollout[2][-1] if rollout[2][-1] in [-100, 100] else 0)
  ts = [t for t, a in enumerate(rollout[1]) if a not in NOOPS]
  comb_ins.extend(list(np.array(ts) / len(rollout[1])))

In [None]:
outcomes = [x if x in [-100, 100] else 0 for x in outcomes]
comb_outcomes = [x if x in [-100, 100] else 0 for x in comb_outcomes]

In [None]:
plt.xlabel('Fraction of Actions that Differ from Previous Action')
plt.ylabel('Reward')
plt.scatter(n_changes, rews, label='Solo Human Pilot', alpha=0.5, color='gray', linewidth=0)
plt.scatter(comb_n_changes, comb_rews, label='Human Pilot + Copilot', alpha=0.5, color='orange', linewidth=0)
plt.legend(loc='best')
plt.show()

In [None]:
all_n_changes = np.array(n_changes + comb_n_changes)
all_outcomes = np.array(outcomes + comb_outcomes)
n_changes = np.array(n_changes)
comb_n_changes = np.array(comb_n_changes)
outcomes = np.array(outcomes)
comb_outcomes = np.array(comb_outcomes)

In [None]:
plt.xlabel('Actions Per Minute (APM)')
plt.ylabel('Number of Trajectories')
plt.hist(60 / (np.array(act_durations) * 0.2), alpha=0.5, label='Unassisted', color='gray')#, normed=True)
plt.hist(60 / (np.array(comb_act_durations) * 0.2), alpha=0.5, label='Assisted', color='orange')#, normed=True)
plt.legend(loc='best')
plt.title('Lunar Lander User Study')
plt.savefig(os.path.join(data_dir, 'lander-apm.png'), bbox_inches='tight')
plt.show()

In [None]:
apm = int(60 / (np.mean(act_durations) * 0.2))
comb_apm = int(60 / (np.mean(comb_act_durations) * 0.2))
apm, comb_apm, comb_apm / apm

In [None]:
np.mean(speeds), np.mean(comb_speeds), (1 - np.mean(comb_speeds) / np.mean(speeds))

In [None]:
solo_outcomes = []
solo_traj = []
assisted_outcomes = []
assisted_traj = []
for pilot_id in pilot_ids:
  with open(os.path.join(newton_data_dir, '%s_pilot_policy_demo_rollouts.pkl' % pilot_id), 'rb') as f:
    eval_rollouts = [list(zip(*rollout)) for rollout in sum(pickle.load(f), [])]
  with open(os.path.join(aristotle_data_dir, '%s_pilot_policy_demo_rollouts.pkl' % pilot_id), 'rb') as f:
    comb_rollouts = [list(zip(*rollout)) for rollout in sum(pickle.load(f), [])]
  solo_outcomes.extend([rollout[2][-1] if rollout[2][-1] in [-100, 100] else 0 for rollout in eval_rollouts])
  solo_traj.extend([rollout[0] for rollout in eval_rollouts])
  assisted_outcomes.extend([rollout[2][-1] if rollout[2][-1] in [-100, 100] else 0 for rollout in comb_rollouts])
  assisted_traj.extend([rollout[0] for rollout in comb_rollouts])

In [None]:
goals = [round(float(x), 1) for x in np.arange(-0.8, 1, 0.2)]
SUCCESS = 100
CRASH = -100

In [None]:
def plot_trajectories(outcomes, trajectories, title, file_name=None, G=0, show_goal=True):
  plt.title(title)

  for outcome, trajectory in zip(outcomes, trajectories):
    x, y, vx, vy, a, av, lc, rc, g = list(zip(*trajectory[::5]))[:9]
    if g[0] != G:
      continue
    if outcome == SUCCESS:
      cmap = mpl.cm.YlGn
    elif outcome == CRASH:
      cmap = mpl.cm.YlOrRd
    else:
      cmap = mpl.cm.gray
    plt.scatter(x, y, c=range(len(x)), cmap=cmap, alpha=0.75, linewidth=0)
    if show_goal:
      plt.scatter([g[0]], [0], marker='*', color='yellow', edgecolor='black', linewidth=1, s=300, alpha=0.5)
    
  plt.xlim([-1, 1])
  plt.ylim([-0.1, 1.1])
  plt.xticks([])
  plt.yticks([])
  plt.axis('off')
  if file_name is not None:
    plt.savefig(os.path.join(data_dir, file_name), bbox_inches='tight')
  plt.show()

In [None]:
plot_trajectories(solo_outcomes, solo_traj, 'Unassisted', 'lander-unassisted-traj.pdf')

In [None]:
plot_trajectories(assisted_outcomes, assisted_traj, 'Assisted', 'lander-assisted-traj.pdf')