In [126]:
import sys

from os.path import dirname, abspath
import os
sys.path.append(dirname(abspath(os.getcwd())))
import gymnasium as gym
import torch
import gym_usv
import numpy as np
from cleanrl.rpo_continuous_action import Agent, make_env
import matplotlib.pyplot as plt
from IPython.display import Video
import tempfile
from tqdm import tqdm
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import seaborn as sns
#sns.set_theme()
import matplotlib
import matplotlib as mpl
from matplotlib.collections import EllipseCollection
from torch.distributions.normal import Normal
import pickle
# matplotlib.use("pgf")
# matplotlib.rcParams.update({
#     "pgf.texsystem": "pdflatex",
#     'font.family': 'serif',
#     'text.usetex': True,
#     'pgf.rcfonts': False,
# })

In [127]:
torch.manual_seed(1)

<torch._C.Generator at 0x10605c7d0>

In [128]:
def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer

perturb_range = (100, 400)
envs = gym.vector.SyncVectorEnv(
        [make_env("usv-asmc-ca-v0", 0, False, "test_run", 0.99, perturb_range, (10, 10)) for i in range(1)]
    )

envs = gym.wrappers.ClipAction(envs)
envs = gym.wrappers.NormalizeObservation(envs)
envs = gym.wrappers.TransformObservation(envs, lambda obs: np.clip(obs, -10, 10))
envs = gym.wrappers.NormalizeReward(envs, gamma=0.99)
envs = gym.wrappers.TransformReward(envs, lambda reward: np.clip(reward, -10, 10))

max_y = 10
min_y = -10
max_x = 30
min_x = -10

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [129]:
agent = Agent(envs, 0.5)
agent_dict = torch.load('agent.pt', map_location=torch.device('cpu'))
agent.load_state_dict(torch.load('agent.pt', map_location=torch.device('cpu')), strict=False)
agent.eval()

obs_rms = pickle.load(open('obs_rms.pickle', 'rb'))

envs.obs_rms.mean = obs_rms['mean']
envs.obs_rms.var = obs_rms['var']
envs.obs_rms.count = obs_rms['count']
#envs.obs_rms.count = np.array([2419320])
#envs.envs[0].obs_rms.mean = agent_dict['obs_rms_mean'][0]
#envs.envs[0].obs_rms.var = agent_dict['obs_rms_var'][0]

In [130]:
obs_rms

{'mean': array([ 1.77732177e-01,  3.00550070e-03,  3.77092061e-03,  4.25927069e-04,
        -1.79072119e-01,  2.68265734e-01, -1.58793692e-02,  5.71590390e-01,
         1.37988319e-02,  5.15883122e-02,  6.19614499e-01,  6.37417493e-01,
         6.50799974e-01,  6.51080296e-01,  6.43666655e-01,  6.58157756e-01,
         6.84701929e-01,  6.81620398e-01,  6.60253315e-01,  6.32949006e-01,
         6.00317281e-01,  5.88445510e-01,  6.00573263e-01,  6.28763435e-01,
         6.66440075e-01,  6.94507007e-01,  6.97379063e-01,  6.99754700e-01,
         6.89772523e-01,  6.66152938e-01,  6.35462250e-01,  5.83969288e-01,
         5.36844768e-01,  5.07354244e-01,  5.02665121e-01,  5.14688266e-01,
         5.19586967e-01,  5.07747223e-01,  5.09618744e-01,  5.26516965e-01,
         5.37750626e-01,  5.31406429e-01]),
 'var': array([6.10570976e-03, 6.07394314e-03, 7.99037435e-05, 1.00358690e-04,
        3.32795706e-01, 2.41636405e-02, 1.99815447e-01, 1.37250147e-03,
        1.52537870e-01, 8.29093391e-0

In [131]:
def experiment_1_options():
    options = {}
    options['obs_x'] = np.array([-6,0,6,3,-3])
    options['obs_y'] = np.array([0,0,0,4.5,4.5])
    options['obs_r'] = np.array([1.5,1.5,1.5,1.5,1.5])
    options['start_position'] = np.array([0,-8,np.pi/2])
    options['target_point'] = np.array([0, 8, 0])
    options['renderplots'] = False
    return options

def experiment_2_options():
    options = {'obs_x': np.array([]), 'obs_y':np.array([]), 'obs_r':np.array([])}    
    
    def draw_vert_wall(options, start_x, end_x, y, radius=1):
        
        x = np.arange(start_x, end_x, radius * 2)
        y = np.full(len(x), y)
        r = np.full(len(x), radius)
        
        options['obs_x'] = np.concatenate((options['obs_x'], x))
        options['obs_y'] = np.concatenate((options['obs_y'], y))
        options['obs_r'] = np.concatenate((options['obs_r'], r))

    
    draw_vert_wall(options, -10, 30, -4, 0.5)
    draw_vert_wall(options, -10, 30, 1, 0.5)
    
    indexes_to_remove = [7,8,9,60,61,62]
    options['obs_x'] = np.delete(options['obs_x'], indexes_to_remove)
    options['obs_y'] = np.delete(options['obs_y'], indexes_to_remove)
    options['obs_r'] = np.delete(options['obs_r'], indexes_to_remove)
    
    options['obs_x'] = np.append(options['obs_x'], [-10, 10])
    options['obs_y'] = np.append(options['obs_y'], [-3, -7])
    options['obs_r'] = np.append(options['obs_r'], [5, 5])

    options['start_position'] = np.array([-2,-8,np.pi/2])
    options['target_point'] = np.array([0, 8, 0])
    options['renderplots'] = False
    return options

In [None]:
options = experiment_1_options()

next_obs, _ = envs.reset(seed=1, options=options)
next_obs = torch.Tensor(next_obs)
all_info = []

from gymnasium.wrappers.monitoring import video_recorder
recorder = video_recorder.VideoRecorder(
    envs.envs[0],
    "test_video.mp4"
)

for step in tqdm(range(5000)):
    with torch.no_grad():
        action, _, _, _ = agent.get_action_and_value(next_obs)
    
    state, reward, terminated, truncated, infos = envs.step(action.cpu().numpy())
    all_info.append(infos)
    done = np.logical_or(terminated, truncated)
    if done.all():
        break
    next_obs = torch.Tensor(state)
    recorder.capture_frame()

recorder.close()
Video("test_video.mp4")

  logger.warn(
 44%|██████████████████████████████████▉                                             | 2186/5000 [00:07<00:09, 302.94it/s]


Moviepy - Building video test_video.mp4.
Moviepy - Writing video test_video.mp4



t:  21%|██████████████▎                                                     | 459/2186 [00:02<00:06, 256.48it/s, now=None]

In [None]:
from collections import defaultdict
raw_data = defaultdict(list)
for info in all_info[:-2]:
    keys = all_info[0].keys()
    for key in keys:
        raw_data[key].append(info[key])

data = {}
concat_keys = ['position', 'velocity', 'action', 'action_in']
for key in concat_keys:
    data[key] = np.concatenate(raw_data[key])
    
for info in [a['asmc_info'] for a in all_info[:-2]]:
    d = info[0]
    for sub_d in d:
        for k, v in sub_d.items():
            raw_data[k].append(v)

asmc_data = {}
asmc_data['tstbd'] = raw_data['tstbd']
asmc_data['tport'] = raw_data['tport']
df_asmc = pd.DataFrame.from_dict(asmc_data)
df_asmc['time'] = df_asmc.index * 0.01

df = pd.DataFrame.from_dict(data)
df = pd.concat([df, pd.DataFrame(df['position'].to_list(), columns=['x','y','rot'])], axis=1)
df = pd.concat([df, pd.DataFrame(df['velocity'].to_list(), columns=['u','v','r'])], axis=1)
df = pd.concat([df, pd.DataFrame(df['action'].to_list(), columns=['action0', 'action1'])], axis=1)
df = pd.concat([df, pd.DataFrame(df['action_in'].to_list(), columns=['action_in0', 'action_in1'])], axis=1)

df['time'] = df.index * 0.1

df.describe()

obstacles = raw_data['obstacles'][0][0]
obstacle_radius = raw_data['obstacle_radius'][0][0]
target = raw_data['target'][0][0][:-1]
start_x, start_y, start_r = df['position'][0]

df.describe()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(10,6))
ax.plot(df.time, df['action_in0'], ls="-")
ax.plot(df.time, df['action_in1'], ls="-")
ax.legend(loc="upper right", labels=['action0 in', 'action1 in'])
#ax.set_title("Action")
ax.set_xlabel("Time(s)")
#plt.savefig('figures/action_plot.pgf')
ax.set_xlim(0, 20)

fig.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(10,6))
ax.plot(df_asmc.time, df_asmc['tport'], ls="-")
ax.plot(df_asmc.time, df_asmc['tstbd'], ls="-")
ax.legend(loc="upper right", labels=['tport', 'tstbd'])
ax.set_title("Thruster output")
ax.set_xlabel("Time (s)")
ax.set_xlim(0, 20)
ax.set_ylabel("N")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(10,6))
ax.plot(df.time, df['u'], ls="-")
ax.plot(df.time, df['r'], ls="-")
ax.legend(loc="upper right", labels=['u', 'r'])
#ax.set_title("Action")
ax.set_xlabel("Time(s)")
#plt.savefig('figures/action_plot.pgf')
fig.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(10,6))
# map axis are swapped
ax.plot(df['y'], df['x'], 'b:', label="Path")
obstacles_rotated = obstacles.copy()
obstacles_rotated[:, 0], obstacles_rotated[:, 1] = obstacles_rotated[:, 1], obstacles_rotated[:, 0].copy()
ax.add_collection(EllipseCollection(widths=obstacle_radius*2-0.3, heights=obstacle_radius*2-0.3, angles=0, units='xy',
                                       offsets=obstacles_rotated, transOffset=ax.transData))
goal_radius = 1.5
from matplotlib.patches import Ellipse
goal_r = 3
goal_ell = Ellipse(xy=[target[1], target[0]],
                   width=goal_r, height=goal_r, 
                   edgecolor='g', lw=2, linestyle=':', 
                   facecolor='none')
ax.add_artist(goal_ell)
ax.plot(target[1], target[0], 'g*', label="Goal", markersize=10)

t = mpl.markers.MarkerStyle(marker='^')
t._transform = t.get_transform().rotate(-start_r)

# Add markers on perturbation
add_perturb = True
if add_perturb:
    # sample a point every x steps
    samples = np.arange(perturb_range[0], perturb_range[1], 10)
    sx = df['x'][samples]
    sy = df['y'][samples]
    ax.plot(sy, sx, 'm^', label="Perturbation")
    

ax.plot(start_y, start_x, 'r', label="Start", marker=t, markersize=10, linestyle='None')
ax.set_aspect(1)
ax.set_xlabel("X(m)")
ax.set_ylabel("Y(m)")


# Find new min and max limits
plt_min = min(start_x, target[0]), min(start_y, target[1])
plt_max = max(start_x, target[0]), max(start_y, target[1])

y_margin = 8
x_margin = 8
ax.set_aspect('equal')
#ax.set_ylim(plt_min[0] - y_margin, plt_max[0] + y_margin)
#ax.set_xlim(plt_min[1] - x_margin, plt_max[1] + x_margin)
ax.legend()
fig.show()

#plt.savefig('figures/experiment1_path.pgf')


In [None]:
exp2_data = {'obs_x': envs.envs[0].obs_x, 'obs_y': envs.envs[0].obs_y, 'obs_r': envs.envs[0].obs_r}
#pickle.dump(exp2_data, open('exp2_data.pkl', 'wb'))