In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from __future__ import division

from collections import defaultdict
import os
import pickle
import uuid

import numpy as np

from pico.gan import PicoGAN
from pico.discrim_models import MLPDiscrim
from pico.envs import CarEnv
from pico.user_models import HumanCarUser
from pico import compression_models
from pico import utils

sess = utils.make_tf_session(gpu_mode=False)

In [None]:
user_id = 'pilot'

In [None]:
data_dir = os.path.join(utils.car_data_dir, 'userstudy', user_id)
if not os.path.exists(data_dir):
  os.makedirs(data_dir)
  
human_user = HumanCarUser()

n_act_blocks = 8
n_act_dims = 3
n_z_dim = 32
n_obs_dims = n_z_dim+128*2
delay = 100
max_ep_len = 100
discrete_act = True
train_mask_limits = (0.5, 0.5)

def make_env(
  val_mode=True, 
  apply_mask=None, 
  mask_limits=None, 
  demo_data=None, 
  human_data=None
  ):
  discrim = MLPDiscrim(
    sess, 
    n_act_dims=n_act_dims,
    n_obs_dims=n_obs_dims,
    discrete_act=discrete_act,
    struct=True,
    n_layers=2,
    layer_size=256,
    scope=str(uuid.uuid4()),
    scope_file=os.path.join(data_dir, 'discrim_scope.pkl'),
    tf_file=os.path.join(data_dir, 'discrim.tf')
  )
  discrim.noverfit = True
  
  rew_mod = MLPDiscrim(
    sess, 
    n_act_dims=n_act_blocks,
    n_obs_dims=n_obs_dims,
    discrete_act=discrete_act,
    n_layers=2,
    layer_size=256,
    scope=str(uuid.uuid4()),
    scope_file=os.path.join(data_dir, 'rew_mod_scope.pkl'),
    tf_file=os.path.join(data_dir, 'rew_mod.tf')
  )
  rew_mod.init_tf_vars()
  
  if mask_limits is not None:
    mask_limits = mask_limits
  elif not val_mode:
    mask_limits = train_mask_limits
  else:
    mask_limits = (None, None)
    
  env = CarEnv(
    sess,
    rew_mod=rew_mod,
    discrim=discrim,
    save_imgs=False,
    delay=delay,
    max_ep_len=max_ep_len,
    n_act_blocks=n_act_blocks,
    human_user=human_user,
    apply_mask=apply_mask,
    val_mode=val_mode,
    mask_limits=mask_limits,
    demo_data=demo_data,
    data=human_data,
    discrete_act=discrete_act
  )
  return env

apply_mask = None
baseline_mask_policy = lambda real_obses: np.random.random((real_obses.shape[0], n_act_blocks))
metric_names = ['rtn', 'succ', 'crash', 'min_dist']

In [None]:
mask_limit = 1
n_calib_eps = 10
n_prac_eps = 5

In [None]:
demo_env = make_env(val_mode=False, mask_limits=(mask_limit, mask_limit), apply_mask=apply_mask)

In [None]:
for _ in range(n_prac_eps):
  utils.run_ep(baseline_mask_policy, demo_env)

In [None]:
demo_perfs = [utils.run_ep(baseline_mask_policy, demo_env) for _ in range(n_calib_eps)]

In [None]:
demo_path = os.path.join(data_dir, 'demo_data.pkl')

In [None]:
with open(demo_path, 'rb') as f:
  demo_perfs, demo_data = pickle.load(f)

In [None]:
demo_metrics = utils.compute_seq_metrics(demo_perfs)

In [None]:
for k in metric_names:
  print(k, np.mean(demo_metrics[k]))

In [None]:
demo_data = {
  'obses': demo_env.data['fake_obses'],
  'actions': demo_env.data['actions']
}
demo_data = utils.split_data(demo_data, train_frac=0.99)

with open(demo_path, 'wb') as f:
  pickle.dump((demo_perfs, demo_data), f, pickle.HIGHEST_PROTOCOL)

In [None]:
n_offline_eps = 15

In [None]:
env = make_env(val_mode=False, mask_limits=train_mask_limits, apply_mask=apply_mask)

In [None]:
offline_perfs = [utils.run_ep(baseline_mask_policy, env) for _ in range(n_offline_eps)]

In [None]:
human_data_path = os.path.join(data_dir, 'human_data.pkl')

In [None]:
with open(human_data_path, 'rb') as f:
  offline_perfs, human_data = pickle.load(f)

In [None]:
offline_metrics = utils.compute_seq_metrics(offline_perfs)

In [None]:
for k in metric_names:
  print(k, np.mean(offline_metrics[k]), np.mean(demo_metrics[k]))

In [None]:
human_data = utils.split_data(env.data, train_frac=0.99)
with open(human_data_path, 'wb') as f:
  pickle.dump((offline_perfs, human_data), f, pickle.HIGHEST_PROTOCOL)

In [None]:
obses = human_data['obses']
fake_obses = human_data['fake_obses']
human_data['imgs'] = np.array([env.viz_zch(obses[i]) for i in range(len(obses))]) / 255.
human_data['fake_imgs'] = np.array([env.viz_zch(fake_obses[i]) for i in range(len(fake_obses))]) / 255.

In [None]:
def make_model(env, model_path):
  if not os.path.exists(model_path):
    os.makedirs(model_path)
  model = compression_models.MLPCompressor(
    sess,
    rew_mod=env.rew_mod,
    n_obs_dims=n_obs_dims,
    n_act_dims=n_act_blocks,
    n_user_act_dims=n_act_dims,
    n_layers=2,
    layer_size=64,
    #scope=str(uuid.uuid4()),
    scope_file=os.path.join(model_path, 'scope.pkl'),
    tf_file=os.path.join(model_path, 'model.tf')
  )
  return model

In [None]:
model_train_kwargs = {
  'iterations': 10000,
  'ftol': 1e-6,
  'learning_rate': 1e-3,
  'batch_size': 32,
  'val_update_freq': 1000,
  'verbose': True
}

n_iter = 1

discrim_train_kwargs = {
  'iterations': 10000,
  'ftol': 1e-6,
  'learning_rate': 1e-3,
  'batch_size': 32,
  'val_update_freq': 1000,
  'verbose': True
}

rew_mod_train_kwargs = {
  'iterations': 10000,
  'ftol': 1e-6,
  'learning_rate': 1e-3,
  'batch_size': 32,
  'val_update_freq': 1000,
  'verbose': True
}

rew_mod_update_freq = 0

aug_demo_data = {k: np.concatenate([v, v], axis=0) for k, v in demo_data.items()}
aug_demo_data = utils.split_data(aug_demo_data, train_frac=0.99)

def run_gan_training(model_path, human_data=None, using_mae=False):
  env = make_env(val_mode=False, apply_mask=apply_mask, demo_data=aug_demo_data, human_data=human_data)
  model = make_model(env, model_path)
  gan = PicoGAN(model, env)
  model = gan.train(
    model_train_kwargs, 
    verbose=False, 
    n_iter=n_iter,
    rew_mod_update_freq=rew_mod_update_freq, 
    rew_mod_train_kwargs=rew_mod_train_kwargs,
    discrim_train_kwargs=discrim_train_kwargs,
    discrim_zero_val=0.5,
    using_mae=using_mae
  )    
  return model

In [None]:
model_path = os.path.join(data_dir, 'model_0')
mae_model_path = os.path.join(data_dir, 'mae_model_0')

In [None]:
model = run_gan_training(
  model_path=model_path, 
  human_data=human_data,
  using_mae=False
)

In [None]:
model.save()

In [None]:
mae_model = run_gan_training(
  model_path=model_path, 
  human_data=human_data,
  using_mae=True
)

In [None]:
mae_model.save()

In [None]:
n_eval_episodes = 15
mask_limit = 0.5

In [None]:
eval_env = make_env(val_mode=True, apply_mask=apply_mask)

In [None]:
def load_model(model_path):
  model = make_model(eval_env, model_path)
  model.load()
  return model

In [None]:
model = load_model(model_path)

In [None]:
mae_model = load_model(mae_model_path)

In [None]:
learned_mask_policy = model.act
mae_mask_policy = mae_model.act

def local_eval_model(compression_model, mask_limit, save_imgs=False):
  eval_env.mask_limit = mask_limit
  eval_env.save_imgs = save_imgs
  return utils.evaluate_seq_compression(
    compression_model, 
    eval_env, 
    n_eval_episodes=n_eval_episodes,
  )

In [None]:
metrics_path = os.path.join(data_dir, 'metrics.pkl')

In [None]:
with open(metrics_path, 'rb') as f:
  metrics_of_model = pickle.load(f)

In [None]:
metrics_of_model = {}

In [None]:
metrics_of_model['baseline'] = local_eval_model(baseline_mask_policy, mask_limit)

In [None]:
metrics_of_model['learned'] = local_eval_model(learned_mask_policy, mask_limit)

In [None]:
metrics_of_model['mae'] = local_eval_model(mae_mask_policy, mask_limit)

In [None]:
with open(metrics_path, 'wb') as f:
  pickle.dump(metrics_of_model, f, pickle.HIGHEST_PROTOCOL)

In [None]:
for k in metric_names:
  print(
    k, 
    np.mean(metrics_of_model['baseline'][k]), 
    np.mean(metrics_of_model['mae'][k]), 
    np.mean(metrics_of_model['learned'][k]), 
    np.mean(demo_metrics[k])
  )