In [1]:
# Config UI

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
import learning_network.evaluate_moral_dqn as moral_dqn
import environment.gym_grid_driving.envs.grid_driving as grid_driving

import gym
import torch

from learning_network.model import DQNModel

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_path = 'models/double_dqn'
env_config = {
    'lanes': [
        grid_driving.LaneSpec(cars=1, speed_range=[-2, -1]),
        grid_driving.LaneSpec(cars=1, speed_range=[-2, -1]),
        grid_driving.LaneSpec(cars=1, speed_range=[-2, -1]),
        grid_driving.LaneSpec(cars=1, speed_range=[-2, -1]),
        grid_driving.LaneSpec(cars=1, speed_range=[-2, -1]),
    ],
    'width': 10,
    'agent_pos_init': grid_driving.Point(9, 2),
    'finish_position': grid_driving.Point(0, 2),
    'random_lane_speed': False,
    'ensure_initial_solvable': False,

    'moral_reward_model_path': 'models/moral_reward',
    'features': [None, None],
    'observations': [
        grid_driving.ObsSpec(1, (2, 1)),
        grid_driving.ObsSpec(2, (2, 3))
    ],

    'agent_speed_range': [-2, -1],

    'rewards': grid_driving.DenseReward,
    'observation_type': 'tensor',
    'mask': None,
    'stochasticity': 0.0,

    'random_seed': 0,
}


def update_config(
        num_lanes,
        num_cars_per_lane,
        width,
        random_seed):
    lanes = []
    for i in range(num_lanes):
        lanes.append(grid_driving.LaneSpec(
            cars=num_cars_per_lane, speed_range=[-2, -1]))
    env_config['lanes'] = lanes

    env_config['width'] = width
    env_config['random_seed'] = random_seed


def update_features(
        id,
        ped_ped,
        barrier,
        crossing_signal,
        man,
        woman,
        pregnant,
        stroller,
        old_man,
        old_woman,
        boy,
        girl,
        homeless,
        large_woman,
        large_man,
        criminal,
        m_executive,
        f_executive,
        f_athlete,
        m_athlete,
        f_doctor,
        m_doctor,
        dog,
        cat):
    feats = grid_driving.FeatSpec(
        id, False, 0,
        ped_ped,
        barrier,
        crossing_signal,
        man,
        woman,
        pregnant,
        stroller,
        old_man,
        old_woman,
        boy,
        girl,
        homeless,
        large_woman,
        large_man,
        criminal,
        m_executive,
        f_executive,
        f_athlete,
        m_athlete,
        f_doctor,
        m_doctor,
        dog,
        cat)
    env_config['features'][id - 1] = feats


feature_names = (
    'ped_ped',
    'barrier',
    'crossing_signal',
    'man',
    'woman',
    'pregnant',
    'stroller',
    'old_man',
    'old_woman',
    'boy',
    'girl',
    'homeless',
    'large_woman',
    'large_man',
    'criminal',
    'm_executive',
    'f_executive',
    'f_athlete',
    'm_athlete',
    'f_doctor',
    'm_doctor',
    'dog',
    'cat',
)

sliders_config = {
    'num_lanes': widgets.IntSlider(
        min=5, max=10, step=1, value=5, description='Lanes'),
    'num_cars_per_lane': widgets.IntSlider(
        min=1, max=5, step=1, value=1, description='Cars per Lane'),
    'width': widgets.IntSlider(
        min=10, max=20, step=1, value=10, description='Width'),
    'random_seed': widgets.IntSlider(
        min=0, max=100, step=1, value=0, description='Seed')
}
sliders_1 = {
    name: widgets.IntSlider(min=0, max=5, step=1, value=0, description=name)
    for name in feature_names}
sliders_2 = {
    name: widgets.IntSlider(min=0, max=5, step=1, value=0, description=name)
    for name in feature_names}

ui_config = widgets.VBox(
    [widgets.Label('Config')] + [slider for slider in sliders_config.values()])
ui_1 = widgets.VBox(
    [widgets.Label('1')] + [slider for slider in sliders_1.values()])
ui_2 = widgets.VBox(
    [widgets.Label('2')] + [slider for slider in sliders_2.values()])
ui =  widgets.HBox([ui_config, ui_1, ui_2])

sliders_1['id'] = widgets.fixed(1)
sliders_2['id'] = widgets.fixed(2)

out_config = widgets.interactive_output(update_config, sliders_config)
out_1 = widgets.interactive_output(update_features, sliders_1)
out_2 = widgets.interactive_output(update_features, sliders_2)

display(ui, out_config, out_1, out_2)

HBox(children=(VBox(children=(Label(value='Config'), IntSlider(value=5, description='Lanes', max=10, min=5), Iâ€¦

Output()

Output()

Output()

In [8]:
# Simulator

def load_model(model_path):
    return DQNModel.load(model_path)

def load_env(env_config):
    return gym.make('MoralGridDriving-v0', **env_config)

model = load_model(model_path)
model = model.eval()
model = model.to(device)

env = load_env(env_config)

outputs = []

state = env.reset()
output = env.render(mode='ansi')
outputs.append(output)
for i in range(100):
    state = torch.tensor(state, dtype=torch.float, device=device)
    state = torch.unsqueeze(state, 0)
    with torch.no_grad():
        action = torch.argmax(model(state)['rewards']).item()

    next_state, reward, done, info = env.step(action)
    output = env.render(mode='ansi')
    outputs.append(output)
    state = next_state

    if done:
        break

timestep_slider = widgets.IntSlider(
    min=0, max=len(outputs) - 1, step=1, value=0,
    description='timestep', layout=widgets.Layout(width='25%'))
def display_output(timestep):
    print(outputs[timestep])

out = widgets.interactive_output(display_output, {'timestep': timestep_slider})

output_ui = widgets.VBox([out, timestep_slider])
display(output_ui)

VBox(children=(Output(), IntSlider(value=0, description='timestep', layout=Layout(width='25%'), max=6)))