In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from __future__ import division

import pickle
import os
from collections import defaultdict
import types

import numpy as np
import pandas as pd
from statsmodels.stats.anova import AnovaRM
import statsmodels.api as sm

from sensei.envs import GridWorldNavEnv, GuideEnv
from sensei import utils
from sensei import ase
from sensei.gw_user_study import HumanGridWorldUser
from sensei.guide_models import GridWorldGuide

In [None]:
from matplotlib import pyplot as plt
import matplotlib as mpl
%matplotlib inline

mpl.rcParams.update({'font.size': 18})

In [None]:
data_dir = utils.gw_human_data_dir
fig_dir = os.path.join(data_dir, 'figures')
if not os.path.exists(fig_dir):
  os.makedirs(fig_dir)
user_ids = [str(i) for i in range(12) if str(i) in os.listdir(data_dir)]

In [None]:
baseline_guide_evals_of_user = {}
train_logs_of_user = {}
for user_id in user_ids:
  user_data_dir = os.path.join(data_dir, user_id)
  
  baselines_eval_path = os.path.join(user_data_dir, 'guide_evals.pkl')
  with open(baselines_eval_path, 'rb') as f:
    baseline_guide_evals = pickle.load(f)
  
  train_logs_path = os.path.join(user_data_dir, 'train_logs.pkl')
  with open(train_logs_path, 'rb') as f:
    train_logs = pickle.load(f)
    
  baseline_guide_evals_of_user[user_id] = baseline_guide_evals
  train_logs_of_user[user_data_dir] = train_logs

In [None]:
perf_of_guide = {}
rollouts_of_guide = defaultdict(list)
for user_id, baseline_guide_evals in baseline_guide_evals_of_user.items():
  for guide_name, guide_eval in baseline_guide_evals.items():
    rollouts = guide_eval['rollouts']
    rollouts_of_guide[guide_name].extend(rollouts)

for guide_name, guide_eval_rollouts in rollouts_of_guide.items():
  perf = utils.compute_perf_metrics(guide_eval_rollouts, None, max_ep_len=25)
  perf_of_guide[guide_name] = perf

In [None]:
plt.xlabel('Time')
plt.ylabel('Distance to Goal')
plt.title('2D Navigation')
for guide_name in ['iden', 'naive', 'learned']:
  perf = perf_of_guide[guide_name]
  tilts = perf['dist_to_goal_t']
  tilt_stderrs = perf['dist_to_goal_stderr_t']
  label = utils.label_of_guide[guide_name]
  color = utils.color_of_guide[guide_name]
  xs = np.arange(0, len(tilts), 1)
  ys = np.array(tilts)
  yerrs = np.array(tilt_stderrs)
  y_mins = ys - yerrs
  y_maxs = ys + yerrs
  plt.fill_between(
    xs,
    y_mins,
    y_maxs,
    where=y_maxs >= y_mins,
    interpolate=False,
    label=label,
    color=color,
    alpha=0.5)
  plt.plot(xs, ys, color=color)
plt.legend(loc='upper right', prop={'size': 18})
plt.savefig(os.path.join(fig_dir, 'gw-user-study.pdf'), bbox_inches='tight')
plt.show()

In [None]:
n_users = len(baseline_guide_evals_of_user)
depvar = 'response'
subject = 'user_id'
within = 'condition'
metrics = ['rollout_len']
for metric in metrics:
  rows = []
  for user_id, baseline_guide_evals in baseline_guide_evals_of_user.items():
    rows.append({subject: user_id, depvar: baseline_guide_evals['iden']['perf'][metric], within: 'unassisted'})
    rows.append({subject: user_id, depvar: baseline_guide_evals['learned']['perf'][metric], within: 'assisted'})
  data = pd.DataFrame(rows)
  aovrm = AnovaRM(data=data, depvar=depvar, subject=subject, within=[within])
  res = aovrm.fit()
  print(res)

In [None]:
questions = [
  'I was often able to infer my current position and orientation',
  'I was often able to move toward the goal',
  'I often found the guidance helpful',
  'I relied primarily on the most recent guidance to infer my current position and orientation',
  'I relied primarily on past guidance and recent movements to infer my current position and orientation',
  'I often forgot which position and orientation I believed was in'
]

In [None]:
responses = [
  [[6, 5, 6, 4, 7, 5], [6, 6, 6, 7, 4, 7], [7, 7, 7, 7, 3, 1]],
  [[7, 6, 7, 7, 3, 2], [5, 5, 4, 3, 6, 5], [7, 7, 7, 7, 3, 1]],
  [[5, 6, 6, 6, 4, 4], [6, 6, 6, 6, 5, 3], [7, 7, 7, 6, 5, 1]],
  [[6, 6, 6, 6, 2, 4], [6, 6, 6, 6, 3, 4], [7, 7, 7, 7, 2, 1]],
  [[2, 3, 6, 5, 6, 5], [6, 6, 6, 5, 6, 2], [7, 7, 7, 5, 7, 1]],
  [[5, 5, 7, 6, 6, 3], [6, 6, 6, 6, 6, 1], [7, 7, 7, 7, 6, 1]],
  [[6, 6, 6, 1, 6, 1], [6, 6, 6, 1, 6, 2], [7, 7, 7, 1, 6, 1]],
  [[6, 6, 6, 2, 6, 2], [5, 6, 5, 4, 6, 3], [7, 7, 7, 7, 5, 2]],
  [[5, 4, 4, 3, 6, 3], [4, 4, 3, 2, 6, 3], [6, 6, 7, 4, 6, 2]],
  [[6, 7, 6, 5, 5, 5], [6, 7, 6, 5, 5, 4], [7, 7, 6, 6, 4, 4]],
  [[7, 7, 7, 4, 4, 1], [7, 4, 7, 6, 6, 2], [7, 7, 7, 7, 2, 1]],
  [[5, 5, 5, 4, 4, 3], [5, 5, 5, 4, 5, 3], [6, 6, 7, 6, 3, 1]],
]

In [None]:
n_users = len(responses)
n_phases = len(responses[0])
responses_of_q = [[[np.nan for _ in range(n_users)] for _ in questions] for _ in range(n_phases)]
for phase_idx in range(n_phases):
  for user_idx, user_responses in enumerate(responses):
    for q_idx, response in enumerate(responses[user_idx][phase_idx]):
      responses_of_q[phase_idx][q_idx][user_idx] = response

In [None]:
# one-way repeated measures ANOVA with the presence of assistance as a factor influencing responses
n_users = len(responses)
depvar = 'response'
subject = 'user_id'
within = 'condition'
assistant_labels = [
  '\\multirow{4}{*}{\\rotatebox[origin=c]{90}{Naive ASE}}',
  '\\multirow{4}{*}{\\rotatebox[origin=c]{90}{ASE}}'
]
for assisted_phase in [1, 2]:
  for i, q in enumerate(questions):
    if i == 0:
      assistant_label = assistant_labels[assisted_phase-1]
    else:
      assistant_label = ''
    rows = []
    for user_id in user_ids:
      user_id = int(user_id)
      rows.append({subject: user_id, depvar: responses_of_q[0][i][user_id], within: 'unassisted'})
      rows.append({subject: user_id, depvar: responses_of_q[assisted_phase][i][user_id], within: 'assisted'})
    data = pd.DataFrame(rows)
    aovrm = AnovaRM(data=data, depvar=depvar, subject=subject, within=[within])
    res = aovrm.fit()
    p = res.anova_table['Pr > F'].values[0]
    print('%s & %s & $%s%s%s$ & %0.2f & %s%0.2f%s \\\\' % (assistant_label, q, '\\mathbf{' if p < 0.05 else '', utils.discretize_p_value(p), '}' if p < 0.05 else '', np.nanmean(responses_of_q[0][i]), '\\textbf{' if p < 0.05 else '', np.nanmean(responses_of_q[assisted_phase][i]), '}' if p < 0.05 else ''))
  if assisted_phase == 1:
    print('\midrule')

In [None]:
guide_names = ['prac', 'iden', 'learned']
n_rollouts_of_guide = {
  'prac': 3,
  'iden': 5,
  'learned': 5
}
perfs_of_guide = {guide_name: [[] for _ in range(n_rollouts)] for guide_name, n_rollouts in n_rollouts_of_guide.items()}
for guide_name, n_rollouts in n_rollouts_of_guide.items():
  for i in range(n_rollouts):
    for baseline_guide_evals in baseline_guide_evals_of_user.values():
      rollouts = [baseline_guide_evals[guide_name]['rollouts'][i]]
      if guide_name == 'iden':
        rollouts.append(baseline_guide_evals['naive']['rollouts'][i])
      for rollout in rollouts:
        perf = utils.compute_perf_metrics(rollouts, None, max_ep_len=25)
        perfs_of_guide[guide_name][i].append(perf)

In [None]:
metric = 'rollout_len'
plt.xlabel('Episode Number')
plt.ylabel(utils.label_of_perf_met[metric])
plt.title('2D Navigation')
guide_names = ['iden', 'learned']
for i, guide_name in enumerate(guide_names):
  perfs = perfs_of_guide[guide_name]
  all_perfs = [user_perf[metric] for perf in perfs for user_perf in perf]
  if guide_name == 'learned':
    label = 'ASE (Our Method)'
  elif guide_name == 'iden':
    label = 'Unassisted + Naive ASE (counterbalanced)'
  else:
    label = utils.label_of_guide[guide_name]
  color = utils.color_of_guide[guide_name]
  shift = sum(len(perfs_of_guide[guide_names[j]]) for j in range(i))
  n_users = len(perfs[0])
  xs = np.tile(np.arange(1 + shift, 1 + len(perfs) + shift, 1), n_users)
  ys = np.array(all_perfs)
  plt.scatter(xs, ys, color=color, alpha=0.25)
  
  results = sm.OLS(ys,sm.add_constant(xs - shift - 1)).fit()
  X_plot = np.linspace(1, len(perfs), 100)
  plt.plot(X_plot + shift, X_plot*results.params[1] + results.params[0], label=label, color=color, linestyle='--', linewidth=2)
  
  xs = np.arange(1 + shift, 1 + len(perfs) + shift, 1)
  ys = np.array([np.mean([user_perf[metric] for user_perf in perf]) for perf in perfs])
  stderr = lambda x: np.std(x) / np.sqrt(len(x))
  yerrs = np.array([stderr([user_perf[metric] for user_perf in perf]) for perf in perfs])
  
plt.legend(loc='upper left', prop={'size': 12}, bbox_to_anchor=(0.025, -0.2))
plt.savefig(os.path.join(fig_dir, 'gw-user-study-learning-effect.pdf'), bbox_inches='tight')
plt.show()

In [None]:
gw_size = 5
n_goals = gw_size**2
n_states = 4*gw_size**2
n_objes_per_set = gw_size**2
n_obj_instances_of_set = [1, 2, 1]
n_obj_sets = len(n_obj_instances_of_set)
n_objes = n_objes_per_set*n_obj_sets
n_obses = n_objes + n_obj_sets
ground_truth = np.zeros((n_obses, n_states))
ticks = np.arange(0, gw_size, 1)
poses = utils.enumerate_gw_poses(ticks, ticks)
poses_of_obs = [[] for _ in range(n_obses)]
for obj_set in range(n_obj_sets):
  for obj in range(n_objes_per_set):
    obs = obj_set*(n_objes_per_set+1)+obj
    obj_poses = [poses[obj*4]]
    for i in range(1, n_obj_instances_of_set[obj_set]):
      obj_poses.append(poses[np.random.choice(list(range(n_objes_per_set)))*4])
    poses_of_obs[obs] = obj_poses
    for obj_pos in obj_poses:
      for state, user_pos in enumerate(poses):
        conds = []
        conds.append(obj_pos[0] == user_pos[0] and obj_pos[1] == user_pos[1] + 1 and user_pos[2] == 2)
        conds.append(obj_pos[0] == user_pos[0] and obj_pos[1] == user_pos[1] - 1 and user_pos[2] == 0)
        conds.append(obj_pos[1] == user_pos[1] and obj_pos[0] == user_pos[0] + 1 and user_pos[2] == 3)
        conds.append(obj_pos[1] == user_pos[1] and obj_pos[0] == user_pos[0] - 1 and user_pos[2] == 1)
        if any(conds):
          ground_truth[obs, state] = 1

for obj_set in range(n_obj_sets):
  obs = obj_set*(n_objes_per_set+1)+n_objes_per_set
  for state, user_pos in enumerate(poses):
    conds = []
    conds.append(user_pos[0] == 0 and user_pos[2] == 1)
    conds.append(user_pos[0] == gw_size - 1 and user_pos[2] == 3)
    conds.append(user_pos[1] == 0 and user_pos[2] == 0)
    conds.append(user_pos[1] == gw_size - 1 and user_pos[2] == 2)
    if any(conds):
      ground_truth[obs, state] = 1

ground_truth = utils.smooth_matrix(ground_truth, n_states, eps=1e-6)
ground_truth_obs_model = np.log(ground_truth)

max_ep_len = gw_size**2
env = GridWorldNavEnv(
  gw_size=gw_size,
  n_goals=n_goals,
  max_ep_len=max_ep_len,
  ground_truth_obs_model=ground_truth_obs_model
)

In [None]:
env.n_objes_per_set = n_objes_per_set
env.n_obj_sets = n_obj_sets
def is_obs_informative(self, obs):
  n_uninf_obses = self.n_obses // self.n_obj_sets
  return obs >= n_uninf_obses
env.is_obs_informative = types.MethodType(is_obs_informative, env)

env.practice = False
def set_practice_mode(self, mode):
  self.practice = mode
env.set_practice_mode = types.MethodType(set_practice_mode, env)

In [None]:
sess = utils.make_tf_session(gpu_mode=False)

In [None]:
masked_obses = np.arange(0, env.n_obses // env.n_obj_sets, 1)
internal = np.exp(env.ground_truth_obs_model)
obs_weights = np.ones(env.n_obses)
for obs in masked_obses:
  obs_weights[obs] = 1e-6
internal = utils.smooth_matrix(internal, env.n_obses, eps=(1-obs_weights[:, np.newaxis]))
internal = np.log(internal)
internal_obs_model = internal

user_init_belief_conf = 1e-9

user_model = HumanGridWorldUser(
  env,
  internal_obs_model,
  env.make_dynamics_model(eps=1e-6),
  q_func=env.Q,
  init_belief_conf=user_init_belief_conf
)
guide_env = GuideEnv(env, user_model, n_obs_per_act=1)

In [None]:
def get_theta_of_user(user_id):
  user_data_dir = os.path.join(utils.gw_human_data_dir, user_id)

  init_belief_conf = 1-1e-9
  dynamics_model = env.make_dynamics_model(eps=1e-9)
  internal_dynamics_model = env.make_dynamics_model(eps=0.1)

  tabular_obs_model_kwargs = {
    'scope_file': os.path.join(user_data_dir, 'guide_scope.pkl'),
    'tf_file': os.path.join(user_data_dir, 'guide.tf'),
    'user_init_belief_conf': user_init_belief_conf,
    'obs_params_only': True,
    'prior_coeff': 0.,
    'warm_start': False
  }

  guide_train_kwargs = {
    'iterations': 1000,
    'ftol': 1e-6,
    'batch_size': 32,
    'learning_rate': 1e-2,
    'val_update_freq': 100,
    'verbose': True,
    'show_plots': False
  }

  guide_model = GridWorldGuide(
    sess,
    env,
    env.ground_truth_obs_model,
    dynamics_model,
    env.Q,
    n_obs_per_act=guide_env.n_obs_per_act,
    prior_internal_obs_model=env.ground_truth_obs_model,
    internal_dynamics_model=internal_dynamics_model,
    tabular_obs_model_kwargs=tabular_obs_model_kwargs,
    learn_internal_obs_model=True,
    init_belief_conf=init_belief_conf,
    user_init_belief_conf=user_init_belief_conf
  )

  guide_evals = baseline_guide_evals_of_user[user_id]
  init_train_rollouts = guide_evals['iden']['rollouts']
  guide_optimizer = ase.InteractiveGuideOptimizer(sess, env, guide_env)
  guide_optimizer.run(
    guide_model,
    n_train_batches=0,
    n_rollouts_per_batch=0,
    guide_train_kwargs={'iterations': 0, 'verbose': False},
    verbose=True,
    init_train_rollouts=init_train_rollouts,
    n_eval_rollouts=None
  )
  guide_model.load()
  
  theta = sess.run(guide_model.internal_obs_model.obs_weights)[0, 0, 0]
  return theta

In [None]:
thetas = [get_theta_of_user(user_id) for user_id in user_ids]

In [None]:
thetas

In [None]:
plt.title('2D Navigation')
plt.xlabel(r'Learned Model of User Bias $\hat{\theta}$')
plt.ylabel('Number of Users')
plt.hist(thetas, bins=20, color='orange', label='ASE (Our Method)', align='left')
plt.hist(np.ones(len(thetas)), bins=20, color='teal', label='Naive ASE (Baseline)', align='left')
plt.axvline(x=0, linestyle='--', color='black', label='Ground Truth')
plt.xlim([-0.1, 1.1])
plt.yticks(range(0, 14, 2))
plt.legend(loc='upper center')
plt.savefig(os.path.join(fig_dir, 'gw-learned-theta.pdf'), bbox_inches='tight', dpi=500)
plt.show()