In [None]:
from __future__ import division
import pickle
import random
import os
import math
import types
import uuid
import time
from copy import copy
from collections import defaultdict, Counter

import numpy as np

from scipy.stats import binom_test, ttest_1samp

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
import matplotlib as mpl
mpl.rc('savefig', dpi=300)
mpl.rc('text', usetex=True)

In [None]:
data_dir = os.path.join('data', 'lunarlander-human')

In [None]:
pilot_ids = ['spike', 'jet', 'faye', 'vicious', 'ed', 'ein', 'julia', 'punch', 'judy', 'lin', 'grencia', 'laughingbull']

In [None]:
def onehot_decode(x):
  l = np.nonzero(x)[0]
  assert len(l) == 1
  return l[0]

In [None]:
NOOP = 1
def compute_in_events_for_comb_traj(traj, acts):
  ts = [t for t, obs in enumerate(traj[:-1]) if onehot_decode(obs[-6:]) != NOOP]
  return list(np.array(ts) / len(traj))

In [None]:
eval_rollouts = []
comb_ins = []
comb_acts = []
comb_rews = []
comb_outcomes = []
comb_states = []
for pilot_id in pilot_ids:
  with open(os.path.join(data_dir, '%s_pilot_eval.pkl' % pilot_id), 'rb') as f:
    eval_rollouts.extend(list(zip(*(pickle.load(f)[pilot_id]))))
  with open(os.path.join(data_dir, '%s_reward_logs.pkl' % pilot_id), 'rb') as f:
    reward_logs = pickle.load(f)
    reward_data = list(reward_logs.values())[0]
    comb_acts.extend(reward_data['actions'][0])
    comb_rews.extend(reward_data['rewards'][0])
    comb_outcomes.extend(reward_data['outcomes'][0])
    comb_ins.extend([x for traj, acts in zip(reward_data['trajectories'][0], reward_data['actions'][0]) for x in compute_in_events_for_comb_traj(traj, acts)])
    comb_states.extend(reward_data['trajectories'][0])

In [None]:
comb_outcomes = [x if x in [-100, 100] else 0 for x in comb_outcomes]

In [None]:
def n_act_changes(seq):
  return np.mean([1 if x != y else 0 for x, y in zip(seq[:-1], seq[1:])])

In [None]:
def compute_act_durations(actions):
  durations = []
  last_dur = 1
  for prev, nxt in zip(actions[:-1], actions[1:]):
    if prev != nxt:
      durations.append(last_dur)
      last_dur = 1
    else:
      last_dur += 1
  return durations

In [None]:
def compute_speeds(states):
  return [np.linalg.norm(x[2:4]) for x in states]

In [None]:
n_changes = []
rews = []
outcomes = []
ins = []
act_durations = []
speeds = []
for rollout in eval_rollouts:
  n_changes.append(n_act_changes(rollout[3]))
  act_durations.append(np.mean(compute_act_durations(rollout[3])))
  speeds.append(np.mean(compute_speeds(rollout[2])))
  rews.append(rollout[0])
  outcomes.append(rollout[1])
  ts = [t for t, a in enumerate(rollout[3]) if a != NOOP]
  ins.extend(list(np.array(ts) / len(rollout[3])))

In [None]:
outcomes = [x if x in [-100, 100] else 0 for x in outcomes]
comb_n_changes = [n_act_changes(x) for x in comb_acts]
comb_act_durations = [np.mean(compute_act_durations(x)) for x in comb_acts]
comb_speeds = [np.mean(compute_speeds(x)) for x in comb_states]

In [None]:
mpl.rcParams.update({'font.size': 16})

In [None]:
plt.xlabel('Fraction of Actions that Differ from Previous Action')
plt.ylabel('Reward')
plt.scatter(n_changes, rews, label='Solo Human Pilot', alpha=0.25, color='gray')
plt.scatter(comb_n_changes, comb_rews, label='Human Pilot + Copilot', alpha=0.25, color='orange')
plt.legend(loc='best')
plt.show()

In [None]:
all_n_changes = np.array(n_changes + comb_n_changes)
all_outcomes = np.array(outcomes + comb_outcomes)
n_changes = np.array(n_changes)
comb_n_changes = np.array(comb_n_changes)
outcomes = np.array(outcomes)
comb_outcomes = np.array(comb_outcomes)

In [None]:
plt.xlabel('Fraction of Actions that Differ from Previous Action')
plt.ylabel('Frequency (Normalized Number of Trajectories)')
plt.title('Lunar Lander - Assisted Trajectories')
plt.hist(comb_n_changes[comb_outcomes == 100], alpha=0.5, linewidth=0, label='Success', color='green', normed=True)
plt.hist(comb_n_changes[comb_outcomes == -100], alpha=0.5, linewidth=0, label='Crash', color='red', normed=True)
plt.legend(loc='best')
plt.savefig(os.path.join(data_dir, 'lander-act-switch-vs-succ-assisted.pdf'), bbox_inches='tight')
plt.savefig(os.path.join(data_dir, 'lander-act-switch-vs-succ-assisted.png'), bbox_inches='tight')
plt.show()

In [None]:
plt.xlabel('Fraction of Actions that Differ from Previous Action')
plt.ylabel('Frequency (Normalized Number of Trajectories)')
#plt.title('All Trajectories')
plt.hist(all_n_changes[all_outcomes == 100], alpha=0.5, linewidth=0, label='Success', color='green', normed=True)
plt.hist(all_n_changes[all_outcomes == -100], alpha=0.5, linewidth=0, label='Crash', color='red', normed=True)
plt.legend(loc='best')
plt.title('Lunar Lander - All Trajectories')
plt.savefig(os.path.join(data_dir, 'lander-act-switch-vs-succ.pdf'), bbox_inches='tight')
plt.savefig(os.path.join(data_dir, 'lander-act-switch-vs-succ.png'), bbox_inches='tight')
plt.show()

In [None]:
plt.xlabel('Fraction of Actions that Differ from Previous Action')
plt.ylabel('Frequency (Normalized Number of Trajectories)')
#plt.title('All Trajectories')
plt.hist(n_changes, alpha=0.5, linewidth=0, label='Solo Human Pilot', color='gray', normed=True)
plt.hist(comb_n_changes, alpha=0.5, linewidth=0, label='Human Pilot + Copilot', color='orange', normed=True)
plt.legend(loc='best')
plt.title('Lunar Lander')
plt.savefig(os.path.join(data_dir, 'lander-act-switch-vs-pilot.pdf'), bbox_inches='tight')
plt.savefig(os.path.join(data_dir, 'lander-act-switch-vs-pilot.png'), bbox_inches='tight')
plt.show()

In [None]:
bin_heights, bin_edges = np.histogram(comb_ins, bins=20, normed=True)
bin_centers = 0.5 * (bin_edges[1:] + bin_edges[:-1])
plt.plot(bin_centers, bin_heights, label='Human Pilot + Copilot', color='orange')

bin_heights, bin_edges = np.histogram(ins, bins=20, normed=True)
bin_centers = 0.5 * (bin_edges[1:] + bin_edges[:-1])
plt.plot(bin_centers, bin_heights, label='Solo Human Pilot', color='gray')

plt.title('Lunar Lander')
plt.xlabel('Time (Fraction of Trajectory)')
plt.ylabel('User Input Frequency (Normalized)')
plt.legend(loc='best')
plt.savefig(os.path.join(data_dir, 'lander-input-freq-vs-time.pdf'), bbox_inches='tight')
plt.savefig(os.path.join(data_dir, 'lander-input-freq-vs-time.png'), bbox_inches='tight')
plt.show()

In [None]:
plt.xlabel('Actions Per Minute (APM)')
plt.ylabel('Number of Episodes')
plt.hist(60 / (np.array(act_durations) * 0.2), bins=20, alpha=0.5, label='Solo Human Pilot', color='gray')#, normed=True)
plt.hist(60 / (np.array(comb_act_durations) * 0.2), bins=20, alpha=0.5, label='Human Pilot + Copilot', color='orange')#, normed=True)
plt.legend(loc='best')
plt.title('Lunar Lander User Study')
plt.xlim([None, 250])
plt.savefig(os.path.join(data_dir, 'lander-apm.pdf'), bbox_inches='tight')
plt.show()

In [None]:
apm = int(60 / (np.mean(act_durations) * 0.2))
comb_apm = int(60 / (np.mean(comb_act_durations) * 0.2))
apm, comb_apm, comb_apm / apm

In [None]:
np.mean(speeds), np.mean(comb_speeds), (1 - np.mean(comb_speeds) / np.mean(speeds))

In [None]:
stats_of_pilot = {}

In [None]:
for pilot_id in pilot_ids:
  with open(os.path.join(data_dir, '%s_pilot_eval.pkl' % pilot_id), 'rb') as f:
    eval_rollouts = list(zip(*(pickle.load(f)[pilot_id])))
  with open(os.path.join(data_dir, '%s_reward_logs.pkl' % pilot_id), 'rb') as f:
    reward_logs = pickle.load(f)
    reward_data = list(reward_logs.values())[0]
  unassisted_succ = [1 if x[1] == 100 else 0 for x in eval_rollouts]
  assisted_succ = [1 if x == 100 else 0 for x in reward_data['outcomes'][0]]
  unassisted_fail = [1 if x[1] == -100 else 0 for x in eval_rollouts]
  assisted_fail = [1 if x == -100 else 0 for x in reward_data['outcomes'][0]]
  stats_of_pilot[pilot_id] = (
    np.mean(unassisted_succ), np.std(unassisted_succ) / np.sqrt(len(unassisted_succ)), len(unassisted_succ),
    np.mean(assisted_succ), np.std(assisted_succ) / np.sqrt(len(assisted_succ)), len(assisted_succ),
    np.mean(unassisted_fail), np.std(unassisted_fail) / np.sqrt(len(unassisted_fail)), len(unassisted_fail),
    np.mean(assisted_fail), np.std(assisted_fail) / np.sqrt(len(assisted_fail)), len(assisted_fail)
  )

In [None]:
out = []
for pilot_id in pilot_ids:
  s = stats_of_pilot[pilot_id]
  ctrl_succ = s[0]
  ctrl_fail = s[6]
  treat_succ = s[3]
  treat_fail = s[9]
  out.append([pilot_id, 0, ctrl_succ, ctrl_fail])
  out.append([pilot_id, 1, treat_succ, treat_fail])

In [None]:
# for JMP
with open(os.path.join(data_dir, 'lander_hyp_test.csv'), 'w') as f:
  f.write('userid,assisted,successrate,crashrate\n')
  f.write('\n'.join([','.join([str(z) for z in x]) for x in out]))

In [None]:
solo_pi_succs = []
solo_pi_crashes = []
comb_succs = []
comb_crashes = []
for pilot_id in pilot_ids:
  s = stats_of_pilot[pilot_id]
  ctrl_succ = s[0]
  ctrl_fail = s[6]
  treat_succ = s[3]
  treat_fail = s[9]
  solo_pi_succs.append(ctrl_succ)
  solo_pi_crashes.append(ctrl_fail)
  comb_succs.append(treat_succ)
  comb_crashes.append(treat_fail)

In [None]:
mpl.rcParams.update({'font.size': 16})
plt.xlabel('Crash Rate')
plt.ylabel('Success Rate')
plt.title(r'Lunar Lander User Study ($n = %d$)' % len(solo_pi_crashes))
plt.scatter(solo_pi_crashes, solo_pi_succs, label='Solo Human Pilot', color='gray', s=100, marker='o')
plt.scatter(comb_crashes, comb_succs, label='Human Pilot + Copilot', color='orange', s=100, marker='^')
plt.scatter([0.156], [0.026], label='Solo Copilot', color='teal', s=100, marker='s')
plt.legend(loc='upper right')
plt.ylim([-0.05, 1.05])
plt.xlim([-0.05, 1.05])
plt.savefig(os.path.join(data_dir, 'lander-user-study-fig.pdf'), bbox_inches='tight')
plt.show()

In [None]:
for pilot_id in pilot_ids:
  ctrl_p, ctrl_std, ctrl_n, treat_p, treat_std, treat_n = stats_of_pilot[pilot_id][:6]
  print(
    binom_test(ctrl_p * ctrl_n, ctrl_n, treat_p),
    binom_test(treat_p * treat_n, treat_n, ctrl_p)
  )
  ctrl_p, ctrl_std, ctrl_n, treat_p, treat_std, treat_n = stats_of_pilot[pilot_id][6:]
  print(
    binom_test(ctrl_p * ctrl_n, ctrl_n, treat_p),
    binom_test(treat_p * treat_n, treat_n, ctrl_p)
  )

In [None]:
unassisted_succ = []
assisted_succ = []
unassisted_fail = []
assisted_fail = []
unassisted_rew = []
assisted_rew = []
for pilot_id in pilot_ids:
  with open(os.path.join(data_dir, '%s_pilot_eval.pkl' % pilot_id), 'rb') as f:
    eval_rollouts = list(zip(*(pickle.load(f)[pilot_id])))
  with open(os.path.join(data_dir, '%s_reward_logs.pkl' % pilot_id), 'rb') as f:
    reward_logs = pickle.load(f)
    reward_data = list(reward_logs.values())[0]
  unassisted_succ.extend([1 if x[1] == 100 else 0 for x in eval_rollouts])
  assisted_succ.extend([1 if x == 100 else 0 for x in reward_data['outcomes'][0]])
  unassisted_fail.extend([1 if x[1] == -100 else 0 for x in eval_rollouts])
  assisted_fail.extend([1 if x == -100 else 0 for x in reward_data['outcomes'][0]])
  unassisted_rew.extend([x[0] for x in eval_rollouts])
  assisted_rew.extend(reward_data['rewards'][0])
stats_of_pilots = (
  np.mean(unassisted_rew), np.std(unassisted_rew) / np.sqrt(len(unassisted_rew)),
  np.mean(assisted_rew), np.std(assisted_rew) / np.sqrt(len(assisted_rew)),
  np.mean(unassisted_succ), np.std(unassisted_succ) / np.sqrt(len(unassisted_succ)),
  np.mean(assisted_succ), np.std(assisted_succ) / np.sqrt(len(assisted_succ)), 
  np.mean(unassisted_fail), np.std(unassisted_fail) / np.sqrt(len(unassisted_fail)), 
  np.mean(assisted_fail), np.std(assisted_fail) / np.sqrt(len(assisted_fail)), 
)

In [None]:
for i in range(3):
  print('%f \pm %f & %f \pm %f \\\\' % (stats_of_pilots[4*i], stats_of_pilots[4*i+1], stats_of_pilots[4*i+2], stats_of_pilots[4*i+3]))

In [None]:
binom_test(np.mean(assisted_succ), len(assisted_succ), 0.026)

In [None]:
binom_test(np.mean(assisted_fail), len(assisted_fail), 0.156)

In [None]:
print('%f \pm %f & %f & %f & %f \pm %f & %f & %f \\\\' % (
  stats_of_pilots[0], stats_of_pilots[1], stats_of_pilots[4], stats_of_pilots[8],
  stats_of_pilots[2], stats_of_pilots[3], stats_of_pilots[6], stats_of_pilots[10],
))

In [None]:
solo_outcomes = []
solo_traj = []
assisted_outcomes = []
assisted_traj = []
for pilot_id in pilot_ids:
  with open(os.path.join(data_dir, '%s_pilot_eval.pkl' % pilot_id), 'rb') as f:
    eval_rollouts = pickle.load(f)[pilot_id]
  with open(os.path.join(data_dir, '%s_reward_logs.pkl' % pilot_id), 'rb') as f:
    reward_logs = pickle.load(f)
    reward_data = list(reward_logs.values())[0]
  solo_outcomes.extend(eval_rollouts[1])
  solo_traj.extend(eval_rollouts[2])
  assisted_outcomes.extend(reward_data['outcomes'][0])
  assisted_traj.extend(reward_data['trajectories'][0])

In [None]:
Counter([t[0][8] for t in solo_traj])

In [None]:
goals = [round(float(x), 1) for x in np.arange(-0.8, 1, 0.2)]
SUCCESS = 100
CRASH = -100

In [None]:
def plot_trajectories(outcomes, trajectories, title, file_name=None, G=-0.8, show_goal=True):
  mpl.rcParams.update({'font.size': 16})
  plt.title(title)

  for outcome, trajectory in zip(outcomes, trajectories):
    x, y, vx, vy, a, av, lc, rc, g = list(zip(*trajectory[::5]))[:9]
    if g[0] != G:
      continue
    if outcome == SUCCESS:
      cmap = mpl.cm.YlGn
    elif outcome == CRASH:
      cmap = mpl.cm.YlOrRd
    else:
      cmap = mpl.cm.gray
    plt.scatter(x, y, c=range(len(x)), cmap=cmap, alpha=0.75, linewidth=0)
    if show_goal:
      plt.scatter([g[0]], [0], marker='*', color='yellow', edgecolor='black', linewidth=1, s=300, alpha=0.5)
    
  plt.xlim([-1, 1])
  plt.ylim([-0.1, 1.1])
  plt.xticks([])
  plt.yticks([])
  plt.axis('off')
  if file_name is not None:
    plt.savefig(os.path.join(data_dir, file_name), bbox_inches='tight')
  plt.show()

In [None]:
plot_trajectories(solo_outcomes, solo_traj, 'Solo Human Pilot', 'human-pilot-solo-traj.pdf')

In [None]:
plot_trajectories(assisted_outcomes, assisted_traj, 'Human Pilot + Copilot', 'human-pilot-assisted-traj.pdf')

In [None]:
survey_of_pilot = {
  'vicious': [7, 5, 6, 7, 4, 7, 6, 7, 3, 7, 3, 7, 7],
  'jet': [2, 5, 5, 3, 3, 6, 5, 3, 1, 1, 3, 6, 5],
  'grencia': [6, 6, 6, 5, 5, 4, 7, 6, 5, 5, 5, 6, 6],
  'ed': [1, 7, 6, 4, 6, 6, 5, 4, 2, 3, 4, 7, 7],
  'julia': [2, 7, 5, 4, 3, 6, 6, 4, 5, 6, 3, 6, 6],
  'faye': [5, 6, 5, 5, 4, 5, 5, 6, 6, 6, 7, 7, 5],
  'lin': [3, 6, 7, 4, 3, 4, 7, 5, 4, 7, 6, 7, 7],
  'judy': [5, 2, 7, 5, 6, 7, 7, 1, 6, 7, 3, 7, 7],
  'spike': [4, 5, 7, 7, 5, 3, 7, 7, 5, 4, 5, 7, 7],
  'laughingbull': [5, 2, 4, 2, 3, 3, 6, 4, 2, 4, 6, 7, 3],
  'ein': [5, 7, 6, 5, 5, 4, 6, 3, 2, 2, 5, 7, 7],
  'punch': [7, 7, 7, 6, 3, 6, 7, 4, 6, 6, 6, 7, 7]
}

In [None]:
survey_questions = [
  'I improved over time',
  'The game was too difficult',
  'The copilot was generally helpful in completing the task',
  'I understood what the copilot was trying to do',
  'I could anticipate the copilot’s behavior',
  'The copilot improved over time',
  'The copilot was helpful in avoiding crashing',
  'The copilot was helpful in avoiding flying out of bounds',
  'The copilot was helpful in landing quickly before time ran out',
  'The copilot was helpful in landing between the flags',
  'The copilot was too aggressive and didn’t give me enough control',
  'I play differently with the copilot than without the copilot',
  'I prefer playing with the copilot'
]

In [None]:
survey_elts = [[] for _ in range(len(list(survey_of_pilot.values())[0]))]
for k, v in survey_of_pilot.items():
  for j, x in enumerate(v):
    survey_elts[j].append(x)

In [None]:
survey_ps = [ttest_1samp(x, 4)[1] for x in survey_elts]

In [None]:
survey_ps

In [None]:
survey_counts = np.zeros((len(survey_of_pilot), len(list(survey_of_pilot.values())[0]), 7))
for i, (k, v) in enumerate(survey_of_pilot.items()):
  for j, x in enumerate(v):
    survey_counts[i, j, (8-x)-1] += 1

In [None]:
survey_counts = survey_counts.sum(axis=0)

In [None]:
survey_counts

In [None]:
question_avgs = (survey_counts * np.tile(np.arange(1, 8, 1), survey_counts.shape[0]).reshape(survey_counts.shape))
question_avgs = question_avgs.sum(axis=1) / len(pilot_ids)

In [None]:
question_avgs

In [None]:
def fmt_pval(p):
  if p < 0.001:
    return '<0.001'
  elif p < 0.01:
    return '<0.01'
  elif p < 0.05:
    return '<0.05'
  else:
    return '>0.05'

In [None]:
for i, x in enumerate(survey_questions):
  if survey_ps[i] < 0.05 and question_avgs[i] > 4:
    print('& \\textbf{%s} & $\\mathbf{%s}$ & $\\mathbf{%0.2f}$ & %d & %d & %d & %d & %d & %d & %d \\\\' % (x, fmt_pval(survey_ps[i]), round(question_avgs[i], 2), *list(survey_counts[i])))
  else:
    print('& %s & $%s$ & %0.2f & %d & %d & %d & %d & %d & %d & %d \\\\' % (x, fmt_pval(survey_ps[i]), round(question_avgs[i], 2), *list(survey_counts[i])))