In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from __future__ import division

import pickle
import os
from collections import defaultdict
from copy import deepcopy

import numpy as np
import pandas as pd
from statsmodels.stats.anova import AnovaRM
import statsmodels.api as sm
import gym

from sensei import utils
from sensei import ase
from sensei.envs import LanderEnv

In [None]:
from matplotlib import pyplot as plt
import matplotlib as mpl
%matplotlib inline

mpl.rcParams.update({'font.size': 18})

In [None]:
data_dir = utils.lander_data_dir
fig_dir = os.path.join(data_dir, 'figures')
if not os.path.exists(fig_dir):
  os.makedirs(fig_dir)
user_ids = [str(i) for i in range(12) if str(i) in os.listdir(data_dir)]

In [None]:
user_ids = ['12'] # for videos

In [None]:
base_env = gym.make('LunarLander-v2')
env = LanderEnv(base_env)

In [None]:
baseline_guide_evals_of_user = {}
train_logs_of_user = {}
for user_id in user_ids:
  user_data_dir = os.path.join(data_dir, user_id)
  
  baselines_eval_path = os.path.join(user_data_dir, 'guide_evals.pkl')
  with open(baselines_eval_path, 'rb') as f:
    baseline_guide_evals = pickle.load(f)
  
  train_logs_path = os.path.join(user_data_dir, 'train_logs.pkl')
  with open(train_logs_path, 'rb') as f:
    train_logs = pickle.load(f)
    
  baseline_guide_evals_of_user[user_id] = baseline_guide_evals
  train_logs_of_user[user_data_dir] = train_logs

In [None]:
def update_metrics(rollout):
  for t, x in enumerate(rollout):
    x = list(x)
    tilt = x[-1]['tilt']
    x[-1]['tilt'] = tilt if x[-1].get('agent_obs', x[1][1])[1] > 0 else np.nan
    rollout[t] = tuple(x)
  return rollout

In [None]:
for user_id, baseline_guide_evals in baseline_guide_evals_of_user.items():
  for guide_name, guide_eval in baseline_guide_evals.items():
    rollouts = guide_eval['rollouts']
    rollouts = [update_metrics(rollout) for rollout in rollouts]
    perf = utils.compute_perf_metrics(rollouts, env)
    baseline_guide_evals_of_user[user_id][guide_name]['perf'] = perf

In [None]:
rollouts_of_guide = defaultdict(list)
for user_id, baseline_guide_evals in baseline_guide_evals_of_user.items():
  for guide_name, guide_eval in baseline_guide_evals.items():
    rollouts = guide_eval['rollouts']
    rollouts_of_guide[guide_name].extend(rollouts)

perf_of_guide = {}
for guide_name, guide_eval_rollouts in rollouts_of_guide.items():
  perf = utils.compute_perf_metrics(guide_eval_rollouts, env)
  perf_of_guide[guide_name] = perf

In [None]:
rollouts = sum(list(rollouts_of_guide.values()), [])
ts = []
for rollout in rollouts:
  for t, x in enumerate(rollout):
    if x[-1].get('agent_obs', x[1][1])[1] < 0:
      ts.append(t)
      break
mean_ttl = int(np.mean(ts))
mean_ttl

In [None]:
metric = 'tilt'
plt.xlabel('Time')
plt.ylabel(utils.label_of_perf_met[metric])
plt.title('Lunar Lander')
for guide_name in ['iden', 'learned']:
  perf = perf_of_guide[guide_name]
  tilts = perf['%s_t' % metric][:mean_ttl]
  tilt_stderrs = perf['%s_stderr_t' % metric][:mean_ttl]
  if guide_name == 'learned':
    label = 'ASE (Our Method)'
  elif guide_name == 'iden':
    label = 'Unassisted (Baseline)'
  else:
    label = utils.label_of_guide[guide_name]
  color = utils.color_of_guide[guide_name]
  xs = np.arange(0, len(tilts), 1)
  ys = np.array(tilts)
  yerrs = np.array(tilt_stderrs)
  y_mins = ys - yerrs
  y_maxs = ys + yerrs
  plt.fill_between(
    xs,
    y_mins,
    y_maxs,
    where=y_maxs >= y_mins,
    interpolate=False,
    label=label,
    color=color,
    alpha=0.5)
  plt.plot(xs, ys, color=color)
plt.legend(loc='lower right', prop={'size': 18})
plt.savefig(os.path.join(fig_dir, 'lander-user-study.pdf'), bbox_inches='tight')
plt.show()

In [None]:
n_users = len(baseline_guide_evals_of_user)
depvar = 'response'
subject = 'user_id'
within = 'condition'
metrics = ['tilt']
for metric in metrics:
  rows = []
  for user_id, baseline_guide_evals in baseline_guide_evals_of_user.items():
    rows.append({subject: user_id, depvar: baseline_guide_evals['iden']['perf'][metric], within: 'unassisted'})
    rows.append({subject: user_id, depvar: baseline_guide_evals['learned']['perf'][metric], within: 'assisted'})
  data = pd.DataFrame(rows)
  aovrm = AnovaRM(data=data, depvar=depvar, subject=subject, within=[within])
  res = aovrm.fit()
  print(res)

In [None]:
questions = [
  'I could tell when the lander was tilted',
  'I was able to straighten the lander before it tilted out of control'
]

In [None]:
responses = [
  [[7, 5], [7, 2]],
  [[4, 3], [6, 5]],
  [[6, 5], [6, 6]],
  [[3, 3], [5, 4]],
  [[6, 4], [7, 4]],
  [[7, 4], [7, 3]],
  [[6, 6], [6, 7]],
  [[7, 6], [6, 6]],
  [[5, 3], [5, 4]],
  [[6, 5], [7, 6]],
  [[6, 5], [7, 3]],
  [[5, 4], [7, 5]],
]

In [None]:
n_users = len(responses)
n_phases = len(responses[0])
responses_of_q = [[[np.nan for _ in range(n_users)] for _ in questions] for _ in range(n_phases)]
for phase_idx in range(n_phases):
  for user_idx, user_responses in enumerate(responses):
    for q_idx, response in enumerate(responses[user_idx][phase_idx]):
      responses_of_q[phase_idx][q_idx][user_idx] = response

In [None]:
n_users = len(responses)
depvar = 'response'
subject = 'user_id'
within = 'condition'
for i, q in enumerate(questions):
  rows = []
  for user_id in user_ids:
    user_id = int(user_id)
    rows.append({subject: user_id, depvar: responses_of_q[0][i][user_id], within: 'unassisted'})
    rows.append({subject: user_id, depvar: responses_of_q[1][i][user_id], within: 'assisted'})
  data = pd.DataFrame(rows)
  aovrm = AnovaRM(data=data, depvar=depvar, subject=subject, within=[within])
  res = aovrm.fit()
  p = res.anova_table['Pr > F'].values[0]
  print('%s & $%s%s%s$ & %0.2f & %s%0.2f%s \\\\' % (q, '\\mathbf{' if p < 0.05 else '', utils.discretize_p_value(p), '}' if p < 0.05 else '', np.nanmean(responses_of_q[0][i]), '\\textbf{' if p < 0.05 else '', np.nanmean(responses_of_q[1][i]), '}' if p < 0.05 else ''))

In [None]:
guide_names = ['prac', 'iden', 'learned']
n_rollouts_of_guide = {
  'prac': 5,
  'iden': 10,
  'learned': 10
}
perfs_of_guide = {guide_name: [[] for _ in range(n_rollouts)] for guide_name, n_rollouts in n_rollouts_of_guide.items()}
for guide_name, n_rollouts in n_rollouts_of_guide.items():
  for i in range(n_rollouts):
    for baseline_guide_evals in baseline_guide_evals_of_user.values():
      rollout = baseline_guide_evals[guide_name]['rollouts'][i]
      perf = utils.compute_perf_metrics([rollout], env)
      perfs_of_guide[guide_name][i].append(perf)

In [None]:
plt.xlabel('Episode Number')
plt.ylabel('Tilt at Timestep 80')
plt.title('Lunar Lander')
extract_metric = lambda x: x['tilt_t'][80]
guide_names = ['iden', 'learned']
for i, guide_name in enumerate(guide_names):
  perfs = perfs_of_guide[guide_name]
  tilts = [extract_metric(user_perf) for perf in perfs for user_perf in perf]
  if guide_name == 'learned':
    label = 'ASE (Our Method)'
  else:
    label = utils.label_of_guide[guide_name]
  color = utils.color_of_guide[guide_name]
  shift = sum(len(perfs_of_guide[guide_names[j]]) for j in range(i))
  n_users = len(perfs[0])
  xs = np.tile(np.arange(1 + shift, 1 + len(perfs) + shift, 1), n_users)
  ys = np.array(tilts)
  plt.scatter(xs, ys, color=color, alpha=0.25)

  results = sm.OLS(ys,sm.add_constant(xs - shift - 1)).fit()
  X_plot = np.linspace(1, len(perfs), 100)
  plt.plot(X_plot + shift, X_plot*results.params[1] + results.params[0], label=label, color=color, linestyle='--', linewidth=2)
  
  xs = np.arange(1 + shift, 1 + len(perfs) + shift, 1)
  ys = np.array([np.mean([extract_metric(user_perf) for user_perf in perf]) for perf in perfs])
  stderr = lambda x: np.std(x) / np.sqrt(len(x))
  yerrs = np.array([stderr([extract_metric(user_perf) for user_perf in perf]) for perf in perfs])
  
plt.legend(loc='upper left', prop={'size': 12}, bbox_to_anchor=(0.2, -0.2))
plt.savefig(os.path.join(fig_dir, 'lander-user-study-learning-effect.pdf'), bbox_inches='tight')
plt.show()

In [None]:
plt.xlabel(r'Angle of Lander ($\mathbf{o}_t$)')
plt.ylabel(r'Angle of Tilt Indicator ($\tilde{\mathbf{o}}_t$)')
plt.title('Lunar Lander')
for guide_name in ['iden', 'learned']:
  xs = []
  ys = []
  for user_id, baseline_guide_evals in baseline_guide_evals_of_user.items():
    rollouts = baseline_guide_evals['learned']['rollouts']
    orig_angs = []
    shown_angs = []
    for rollout in rollouts:
      for x in rollout:
        orig_angs.append(x[-1]['agent_obs'][4])
        shown_angs.append(x[1][4])
        
    idxes = sorted(list(range(len(orig_angs))), key=lambda i: orig_angs[i])
    orig_angs = [orig_angs[idx] for idx in idxes]
    shown_angs = [shown_angs[idx] for idx in idxes]
    
    plt.plot(orig_angs, shown_angs, color='orange')
    xs.extend(orig_angs)
    ys.extend(shown_angs)
      
  if guide_name == 'learned':
    label = 'ASE (Our Method)'
  else:
    label = utils.label_of_guide[guide_name]
  color = utils.color_of_guide[guide_name]

corner = [min(max(xs), max(ys)), max(min(xs), min(ys))]
plt.plot(corner, corner, linestyle='--', color='gray', label='Unassisted (Baseline)')
  
plt.plot([0, 0], [0, 0], color='orange', label='ASE (Our Method)')
  
plt.legend(loc='lower right', prop={'size': 18})#, bbox_to_anchor=(0.2, -0.2))
plt.savefig(os.path.join(fig_dir, 'lander-user-study-assistance-policy.pdf'), bbox_inches='tight')
plt.show()

In [None]:
from IPython.core.display import display
from IPython.core.display import HTML
from matplotlib import animation

def outline_img(img, thickness=5, intensity=1):
  img[:thickness, :] = 0
  img[:thickness, :, 0] = intensity
  img[-thickness:, :] = 0
  img[-thickness:, :, 0] = intensity
  img[:, :thickness] = 0
  img[:, :thickness, 0] = intensity
  img[:, -thickness:] = 0
  img[:, -thickness:, 0] = intensity
  return img

gap = (np.ones((400, 1, 3)) * 255).astype('uint8')
def viz_rollout(rollout, guide_name, outline=True):
  frames = []
  for x in rollout:
    if x[-1].get('agent_obs', x[1])[1] < 0:
      break
    faded_img = x[-1]['faded_img']
    mod_img = x[-1]['mod_img']
    img = x[-1]['img']
    if outline:
      if guide_name == 'prac':
        img = outline_img(img, thickness=5, intensity=255)
      elif guide_name == 'iden':
        faded_img = outline_img(faded_img, thickness=5, intensity=255)
      elif guide_name == 'learned':
        mod_img = outline_img(mod_img, thickness=5, intensity=255)
    if guide_name == 'iden':
      frame = np.concatenate((faded_img, gap, img), axis=1)
    else:
      frame = np.concatenate((faded_img, gap, mod_img, gap, img), axis=1)
    frames.append(frame)
  return frames

def animate_frames(frames):
  fig = plt.figure(figsize=(20, 10))
  plt.axis('off')
  ims = [[plt.imshow(frame, animated=True)] for frame in frames]
  plt.close()
  anim = animation.ArtistAnimation(fig, ims, interval=30, blit=True, repeat_delay=1000)
  return anim

In [None]:
rollouts = baseline_guide_evals_of_user['12']['iden']['rollouts']
iden_frames = []
for rollout in rollouts:
  iden_frames.extend(viz_rollout(rollout, guide_name='iden'))
iden_anim = animate_frames(iden_frames)

In [None]:
display(HTML(iden_anim.to_html5_video()))

In [None]:
iden_anim.save(os.path.join(fig_dir, 'iden.mp4'))

In [None]:
rollouts = baseline_guide_evals_of_user['12']['learned']['rollouts']
learned_frames = []
for rollout in rollouts:
  learned_frames.extend(viz_rollout(rollout, guide_name='learned'))
learned_anim = animate_frames(learned_frames)

In [None]:
display(HTML(learned_anim.to_html5_video()))

In [None]:
learned_anim.save(os.path.join(fig_dir, 'learned.mp4'))

In [None]:
rollouts = baseline_guide_evals_of_user['12']['iden']['rollouts']
iden_frames = []
for rollout in rollouts:
  iden_frames.append(viz_rollout(rollout, guide_name='iden', outline=False))
  
rollouts = baseline_guide_evals_of_user['12']['learned']['rollouts']
learned_frames = []
for rollout in rollouts:
  learned_frames.append(viz_rollout(rollout, guide_name='learned', outline=False))

In [None]:
frame_skip = 20
frame_start = 50

In [None]:
def recolor_frame(frame):
  frame[:300, :, :] = np.minimum(frame[:300, :, :], 1).astype('uint8') * (np.array([128, 128, 128])).astype('uint8')[np.newaxis, np.newaxis, :]
  return frame

In [None]:
VIEWPORT_W = 600
VIEWPORT_H = 400
SCALE = 1
LEG_DOWN = 18
helipad_y = (VIEWPORT_H/SCALE)/4
scale_x = lambda x: x * (VIEWPORT_W/SCALE/2) + (VIEWPORT_W/SCALE/2)
scale_y = lambda y: y * (VIEWPORT_H/SCALE/2) + (helipad_y+LEG_DOWN/SCALE)

skip_frames = lambda frames: frames[:1] + frames[frame_start:-2:frame_skip] + frames[-2:-1]

def extract_traj(rollout):
  xs = []
  ys = []
  angs = []
  for rollout in rollout:
    ep_xs = []
    ep_ys = []
    ep_angs = []
    for ixn in rollout:
      if ixn[-1].get('agent_obs', ixn[1])[1] < 0:
        break
      x = scale_x(ixn[-1].get('agent_obs', ixn[1])[0])
      y = scale_y(ixn[-1].get('agent_obs', ixn[1])[1])
      ang = ixn[-1].get('agent_obs', ixn[1])[4]
      ep_xs.append(x)
      ep_ys.append(y)
      ep_angs.append(ang)
    ep_xs = skip_frames(ep_xs)
    ep_ys = skip_frames(ep_ys)
    ep_angs = skip_frames(ep_angs)
    xs.extend(ep_xs)
    ys.extend(ep_ys)
    angs.extend(ep_angs)
  xs = np.array(xs)
  ys = 400-np.array(ys)
  angs = np.array(angs)
  return xs, ys, angs

In [None]:
frames = [recolor_frame(frame[:, -600:, :]) for ep_frames in iden_frames for frame in skip_frames(ep_frames)]
iden_comp_img = np.mean(np.array(frames), axis=0)
iden_comp_img[:300, :, :] = np.minimum(128, iden_comp_img[:300, :, :] * 100)
iden_comp_img = iden_comp_img.astype('uint8')

plt.axis('off')
plt.imshow(iden_comp_img)

xs, ys, angs = extract_traj(baseline_guide_evals_of_user['12']['iden']['rollouts'])
for x, y, ang in zip(xs, ys, angs):
  v = np.array([1, np.tan(-ang+np.pi)])
  v /= np.linalg.norm(v)
  v *= 100
  plt.plot([x-v[0], x+v[0]], [y-v[1], y+v[1]], color='gray', linewidth=2, alpha=0.75)
  
plt.xlim([100, 500])
plt.ylim([0, 350])

plt.savefig(os.path.join(fig_dir, 'lander-unassisted-trajs.pdf'), bbox_inches='tight', dpi=500)

plt.show()

In [None]:
def recolor_frame(frame):
  frame[:300, :, :] = np.minimum(frame[:300, :, :], 1).astype('uint8') * (np.array([255, 165, 0])).astype('uint8')[np.newaxis, np.newaxis, :]
  return frame

In [None]:
frames = [recolor_frame(frame[:, -600:, :]) for ep_frames in learned_frames for frame in (ep_frames[:1] + ep_frames[frame_start:-2:frame_skip] + ep_frames[-2:-1])]
learned_comp_img = np.mean(np.array(frames), axis=0)
learned_comp_img[:300, :, 0] = np.minimum(255, learned_comp_img[:300, :, 0] * 100)
learned_comp_img[:300, :, 1] = np.minimum(165, learned_comp_img[:300, :, 1] * 100)
learned_comp_img[:300, :, 2] = np.minimum(0, learned_comp_img[:300, :, 2] * 100)
learned_comp_img = learned_comp_img.astype('uint8')

plt.axis('off')
plt.imshow(learned_comp_img)

xs, ys, angs = extract_traj(baseline_guide_evals_of_user['12']['learned']['rollouts'])
for x, y, ang in zip(xs, ys, angs):
  v = np.array([1, np.tan(-ang+np.pi)])
  v /= np.linalg.norm(v)
  v *= 100
  plt.plot([x-v[0], x+v[0]], [y-v[1], y+v[1]], color='orange', linewidth=2, alpha=0.75)
  
plt.xlim([100, 500])
plt.ylim([0, 350])

plt.savefig(os.path.join(fig_dir, 'lander-assisted-trajs.pdf'), bbox_inches='tight', dpi=500)

plt.show()

In [None]:
def recolor_frame(frame):
  frame[:300, :, :] = np.minimum(frame[:300, :, :], 1).astype('uint8') * (np.array([128, 0, 128])).astype('uint8')[np.newaxis, np.newaxis, :]
  return frame

In [None]:
img = recolor_frame(deepcopy(learned_frames[0][0][:300, -600:, :]))
for i in range(img.shape[0]):
  for j in range(img.shape[1]):
    if np.isclose(img[i, j, :], np.zeros(3)).all():
      img[i, j, :] = np.array([255, 255, 255]).astype('uint8')

In [None]:
plt.axis('off')
plt.imshow(img)
plt.savefig(os.path.join(fig_dir, 'lander-plain.pdf'), bbox_inches='tight', dpi=500)
plt.show()