## Imports

In [7]:
%load_ext autoreload
%autoreload 2
%load_ext nb_black
%matplotlib notebook

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

### Packages

In [8]:
import functools
import json
import os
import sys
import wandb
from typing import TypeVar, Callable, Any, cast, Tuple, Union
import numpy as np
import pandas as pd
from IPython.lib.display import IFrame
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import LightSource
sys.path.append("..")
sys.path.append("../scripts")

<IPython.core.display.Javascript object>

### Project files

In [9]:
from config import ROOT_DIR
from scripts.run.train import override_params, create_wandb_config, create_wandb_name
from scripts.run.core import get_env_and_graph, load_params, run_session
from run.run_from_files import seed_everything

<IPython.core.display.Javascript object>

In [10]:
# Environment and Agent
env_name = "NoisyDrawbridge"
algo_name = "Herald"
# Additional (overriding the json files) parameters for graph generation and run
graph_params = []
run_params = []
# Paths
env_algo_path = os.path.join(f"{ROOT_DIR}/data", env_name, algo_name + "_trained")
wandb_params_path = os.path.join(
    env_algo_path, "wandb_params.json"
)
log_dir = os.path.join(env_algo_path, "log")
# Session
step = 0

<IPython.core.display.Javascript object>

In [6]:
graph_params_override = functools.partial(override_params, params=graph_params)
run_params_override = functools.partial(override_params, params=run_params)
run_params, graph_params, _ = load_params(env_algo_path)
if graph_params_override:
    graph_params_override(graph_params)
if run_params_override:
    run_params_override(run_params)

<IPython.core.display.Javascript object>

In [117]:
with open(wandb_params_path) as json_file:
    wandb_params = json.load(json_file)

<IPython.core.display.Javascript object>

In [118]:
env_algo_path = os.path.join(f"{ROOT_DIR}/data", env_name, algo_name + "_trained")
run_params, graph_params, _ = load_params(env_algo_path)
env, graph = get_env_and_graph(run_params, graph_params, wandb_params)

Buffer size level 0: 2000000
Buffer size level 1: 2000000
No logging for env: <DrawbridgeEnv<NoisyDrawbridge-v1>>
Using timesteps:
q_statistics_accumulation_start=500
interruption_start=1000
force_interruption_start_at is 1000


  logger.warn(


<IPython.core.display.Javascript object>

In [119]:
matching_model = "params_401k.pt"
load_path = os.path.join("/Users/michalbortkiewicz/repos/phd/rl/HiTS-master/data/NoisyDrawbridge/herald_trained/model/j2faz17p", matching_model)
graph.load_parameters(load_path)

<IPython.core.display.Javascript object>

In [120]:
graph

HeraldGraph
Nodes: ['HeraldNode', 'HACNode']

<IPython.core.display.Javascript object>

Q-value analysis

In [121]:
critics_for_inference = [
    (critic).train(False)
    for critic in graph._nodes[1].algorithm._model.critics
]

obs = {'desired_goal': np.array([0.8]),
        'partial_observation': {
            'ship_pos': np.array([-0.95]),
            'ship_vel': np.array([0.015]), # predskosc w polowie wartosci
            'sails_unfurled': np.array([0.5]),  # zagle dosc rozwiniete
            'bridge_phase': np.array([1]),  # is open
            'bridge_phase1': np.array([0])  # is opening
        }}

action = {'delta_t_ach': np.array([0.6]),  # 160 timesteps
'goal': {'sails_unfurled': np.array([0.5]),
        'ship_pos': np.array([0]),
        'ship_vel': np.array([0.015])}}

<IPython.core.display.Javascript object>

In [122]:
obs_flattened = graph._nodes[1].algorithm._observation_space.flatten_value(obs)
action_flattened = graph._nodes[1].algorithm._action_space.flatten_value(action)
print(obs_flattened)
print(action_flattened)

[ 0.8    1.     0.     0.5   -0.95   0.015]
[0.6   0.5   0.    0.015]


<IPython.core.display.Javascript object>

In [123]:
graph._nodes[0]._calc_q_value(action_flattened, critics_for_inference, obs_flattened)

-7.16757869720459

<IPython.core.display.Javascript object>

# 2D Plots
Q vs position for different binary combinations

In [124]:
graph._nodes[0].subtask.task_spec.unconvert_time(0.6)

160.0

<IPython.core.display.Javascript object>

In [125]:
q_values = []
distances = np.linspace(-1, 1, 100)
combinations = [[0, 0], [0, 1], [1, 1]]
for combination in combinations:
    q_values_row = []
    for i, distance in enumerate(distances):
        obs["partial_observation"]["bridge_phase"] = np.array([combination[0]])
        obs["partial_observation"]["bridge_phase1"] = np.array([combination[1]])
        obs["partial_observation"]["ship_pos"] = np.array([distance])
        action['goal']['ship_pos'] = np.array([distance])+0.2
        obs_flattened = graph._nodes[1].algorithm._observation_space.flatten_value(obs)
        action_flattened = graph._nodes[1].algorithm._action_space.flatten_value(action)
        q_value = graph._nodes[0]._calc_q_value(action_flattened, critics_for_inference, obs_flattened)
        q_values_row.append(q_value)
    q_values.append(q_values_row)
q_values = np.array(q_values)
q_values.shape

(3, 100)

<IPython.core.display.Javascript object>

In [126]:
fig, ax = plt.subplots()
for i, data in enumerate(q_values):
    ax.plot(distances, data, label=f"b, b1:{str(combinations[i])}")
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x16ba36310>

<IPython.core.display.Javascript object>

In [127]:
# With higher speed

<IPython.core.display.Javascript object>

In [128]:
q_values = []
distances = np.linspace(-1, 1, 100)
combinations = [[0, 0], [0, 1], [1, 1],]
for combination in combinations:
    q_values_row = []
    for i, distance in enumerate(distances):
        obs["partial_observation"]["bridge_phase"] = np.array([combination[0]])
        obs["partial_observation"]["bridge_phase1"] = np.array([combination[1]])
        obs["partial_observation"]["ship_pos"] = np.array([distance])
        # obs["partial_observation"]["sails_unfurled"] = np.array([1])
        action['goal']['ship_pos'] = np.array([distance])+0.2
        action['goal']['sails_unfurled'] = np.array([1])
        obs_flattened = graph._nodes[1].algorithm._observation_space.flatten_value(obs)
        action_flattened = graph._nodes[1].algorithm._action_space.flatten_value(action)
        q_value = graph._nodes[0]._calc_q_value(action_flattened, critics_for_inference, obs_flattened)
        q_values_row.append(q_value)
    q_values.append(q_values_row)
q_values = np.array(q_values)
q_values.shape

(3, 100)

<IPython.core.display.Javascript object>

In [129]:
fig, ax = plt.subplots()
for i, data in enumerate(q_values):
    ax.plot(distances, data, label=f"b, b1:{str(combinations[i])}")
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x16b8ca730>

<IPython.core.display.Javascript object>

# Akcje aktora

In [135]:
obs = {'desired_goal': np.array([0.8]),
        'partial_observation': {
            'ship_pos': np.array([-0.95]),
            'ship_vel': np.array([0.015]), # predskosc w polowie wartosci
            'sails_unfurled': np.array([0.5]),  # zagle dosc rozwiniete
            'bridge_phase': np.array([1]),  # is open
            'bridge_phase1': np.array([0])  # is opening
        }}

action = {'delta_t_ach': np.array([0.6]),  # 160 timesteps
        'goal': {'sails_unfurled': np.array([0.5]),
                'ship_pos': np.array([0]),
                'ship_vel': np.array([0.015])}}

algo_info = {
        "is_deterministic": True,
        "child_be_deterministic": True
        }

<IPython.core.display.Javascript object>

In [136]:
graph._nodes[1].policy(obs, algo_info)

(0,
 {'delta_t_ach': array([-0.58215475]),
  'goal': {'sails_unfurled': array([0.98621815]),
   'ship_pos': array([-0.90437937]),
   'ship_vel': array([0.45421657])}})

<IPython.core.display.Javascript object>

# 3D Plots
## Distance from the start

In [75]:
q_values = []
distances = np.linspace(-1, 1, 100)
times = np.linspace(-1, 1, 100)
x, y = np.meshgrid(distances, times)
x.shape

(100, 100)

<IPython.core.display.Javascript object>

In [26]:
for j, time in enumerate(times):
    q_values_row = []
    for i, distance in enumerate(distances):
        obs['partial_observation']['ship_pos']=np.array([distance])
        obs['partial_observation']['bridge_phase'] = np.array([time])
        action['goal']['ship_pos'] = np.array([distance])+0.01
        obs_flattened = graph._nodes[1].algorithm._observation_space.flatten_value(obs)
        action_flattened = graph._nodes[1].algorithm._action_space.flatten_value(action)
        q_value = graph._nodes[0]._calc_q_value(action_flattened, critics_for_inference, obs_flattened)
        q_values_row.append(q_value)
    q_values.append(q_values_row)

<IPython.core.display.Javascript object>

In [27]:
q_values = np.array(q_values)
q_values.shape

(100, 100)

<IPython.core.display.Javascript object>

In [29]:
# Set up plot
fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))

ls = LightSource(270, 45)
# To use a custom hillshading mode, override the built-in shading and pass
# in the rgb colors of the shaded surface calculated from "shade".
rgb = ls.shade(q_values, cmap=cm.gist_earth, vert_exag=0.1, blend_mode='soft')
surf = ax.plot_surface(x, y, q_values, rstride=1, cstride=1, facecolors=rgb,
                       linewidth=0, antialiased=False, shade=False)
plt.xlabel("distance")
plt.ylabel("time")
# plt.show()

<IPython.core.display.Javascript object>

Text(0.5, 0.5, 'time')

<IPython.core.display.Javascript object>

## Czas vs prędkość

In [18]:
q_values = []
vels = np.linspace(-1, 1, 100)
times = np.linspace(-1, 1, 100)
x, y = np.meshgrid(vels, times)
x.shape

(100, 100)

<IPython.core.display.Javascript object>

In [19]:
for j, time in enumerate(times):
    q_values_row = []
    for i, vel in enumerate(vels):
        obs['partial_observation']['ship_vel']=np.array([vel])
        obs['partial_observation']['bridge_phase'] = np.array([time])
        action['goal']['ship_pos'] = np.array([vel])+0.01
        obs_flattened = graph._nodes[1].algorithm._observation_space.flatten_value(obs)
        action_flattened = graph._nodes[1].algorithm._action_space.flatten_value(action)
        q_value = graph._nodes[0]._calc_q_value(action_flattened, critics_for_inference, obs_flattened)
        q_values_row.append(q_value)
    q_values.append(q_values_row)

<IPython.core.display.Javascript object>

In [20]:
q_values = np.array(q_values)
q_values.shape

(100, 100)

<IPython.core.display.Javascript object>

In [21]:
from matplotlib import cbook
from matplotlib import cm
from matplotlib.colors import LightSource

<IPython.core.display.Javascript object>

In [22]:
# Set up plot
fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))

ls = LightSource(270, 45)
# To use a custom hillshading mode, override the built-in shading and pass
# in the rgb colors of the shaded surface calculated from "shade".
rgb = ls.shade(q_values, cmap=cm.gist_earth, vert_exag=0.1, blend_mode='soft')
surf = ax.plot_surface(x, y, q_values, rstride=1, cstride=1, facecolors=rgb,
                       linewidth=0, antialiased=False, shade=False)
plt.xlabel("vel")
plt.ylabel("time")
# plt.show()

<IPython.core.display.Javascript object>

Text(0.5, 0.5, 'time')

<IPython.core.display.Javascript object>

TODO:
- Zlokalizować otwarcie mostu
Dystans vs prędkość
Czas vs prędkość
Duża prędkość w trakcie otwarcia mostu powinna dawać dużo lepsze q niż niska prędkość