In [2]:
%cd learners

/home/buehrle/dev/InteractionImitation/scratch/etienne/pillbox/learners


In [3]:
%load_ext autoreload
%autoreload 2

In [1]:
import torch
import numpy as np

In [9]:
# save expert demos to ../experts/Intersim/demos.npz
# make sure to split different experts up

from intersim.envs.simulator import InteractionSimulator
from intersim.utils import get_map_path, get_svt, SVT_to_stateactions
import gym
from tqdm import tqdm

def pillbox_demo(observations, actions, rewards):
    demo = {
        'env': 'intersim:intersim-v0',
        'num_trajs': len(observations),
        'mean_reward': rewards.mean(),
        'std_reward': rewards.std(),
    }
    demo.update({
        str(i): {
            'states': o,
            'actions': a,
        } for i, (o, a) in enumerate(zip(observations, actions))
    })
    return demo

def intersim_expert_demos(loc, track):
    svt, svt_path = get_svt(loc, track)
    osm = get_map_path(loc)
    
    n_actors = svt.simstate.size(1)
    observations = []
    actions = [] ##
    #states, actions = SVT_to_stateactions(svt) ##
    rewards = []
    
    print('Simulating')
    env = gym.make('intersim:intersim-v0', svt=svt, map_path=osm)
    obs, info = env.reset()
    for s in tqdm(svt.simstate[1:]): ##
    #for a in actions: ##
        relative_state = torch.stack((
            obs['relative_state'][..., 0],
            obs['relative_state'][..., 1],
            (obs['relative_state'][..., 2]**2 + obs['relative_state'][..., 3]**2).sqrt(),
            obs['relative_state'][..., 4],
            obs['relative_state'][..., 5],
        ), -1)
        observations.append(torch.cat((
            obs['state'].unsqueeze(1),
            relative_state,
        ), 1))
        obs, r, done, info = env.step(env.target_state(s, mu=.01))
        #obs, r, done, info = env.step(a) ##
        actions.append(info['action_taken'])
        rewards.append(r)
        assert not done, 'Episode terminated during expert demonstration.'

    _except_idx = lambda o, i: torch.cat((o[:i], o[i+1:]))
        
    # transpose to per-agent observations and actions
    print('Transposing')
    observations = [torch.stack([_except_idx(o[i], i+1) for o in observations]) for i in range(n_actors)]
    actions = [torch.stack([a[i] for a in actions]) for i in range(n_actors)]
    
    print('Trimming')
    # trim observations and actions to start/end of trajectory
    _alive = lambda o: (~o.isnan().all(2).all(1)).nonzero()
    _start = lambda o: _alive(o).min()
    _end = lambda o: _alive(o).max() + 1
    start_end = [(_start(obs), _end(obs)) for obs in observations]
    observations = [obs[start:end] for obs, (start, end) in zip(observations, start_end)]
    actions = [act[start:end] for act, (start, end) in zip(actions, start_end)]
    
    #print('Cropping')
    ## crop observations to max number of observations
    #max_num_obs = max([(~obs.isnan().all(2)).sum(1).max() for obs in observations])
    #observations = [obs[:, :max_num_obs] for obs in observations]
    
    observations = [o.numpy() for o in observations]
    actions = [a.numpy() for a in actions]
    rewards = np.array(rewards)
    
    return pillbox_demo(observations, actions, rewards)

In [10]:
demos = intersim_expert_demos(loc=0, track=0)

Simulating
Custom Vehicle Trajectory Paths
Map Path: datasets/maps/DR_USA_Roundabout_FT.osm
Environment Reset


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 3006/3006 [01:17<00:00, 38.87it/s]


Transposing
Trimming


In [6]:
demos['num_trajs']

151

In [7]:
demos['25']['states'].shape

(71, 151, 5)

In [8]:
np.savez('../experts/intersim:intersim-v0/demos.npz', **demos)