In [1]:
import numpy as np
from stable_baselines3 import PPO
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
from environments import MultiMarkNoModelEnv

2024-04-08 00:42:38.767172: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-08 00:42:38.785999: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-08 00:42:38.786018: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-08 00:42:38.786496: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-08 00:42:38.790201: I tensorflow/core/platform/cpu_feature_guar

In [2]:
outer_radius = 2 * 250 + 2.5 * 0.1 * 250

dt = 5
num_marks = 2
max_seconds = 500 * num_marks / dt

# Initialize the figure and scatter plot
fig = go.FigureWidget()
scatter = fig.add_scatter(mode='markers+lines', name='trajectory')
marks_scatter = fig.add_scatter(mode='markers+text', name='marks')
fig.update_xaxes(range=[-outer_radius, outer_radius],dtick=25)
fig.update_yaxes(range=[-outer_radius, outer_radius],dtick=25)
fig.layout.width=800
fig.layout.height=800

heading_fig = go.FigureWidget()
heading_scatter = heading_fig.add_scatter(mode='markers+lines', name='heading')
heading_fig.update_xaxes(range=[0, max_seconds],dtick=25)
heading_fig.update_yaxes(range=[-210, 210],dtick=30)
heading_fig.layout.width=800
heading_fig.layout.height=600
heading_fig.layout.title = "Heading over time"

vmg_fig = go.FigureWidget()
vmg_scatter = vmg_fig.add_scatter(mode='markers+lines')
vmg_fig.update_xaxes(range=[0, max_seconds],dtick=25)
vmg_fig.update_yaxes(range=[-4, 4],dtick=2)
vmg_fig.layout.width=800
vmg_fig.layout.height=400
vmg_fig.layout.title = "VMG over time"

# Initialize output for the text
out = widgets.Output()

# Display text and figure
display(out)
display(fig)
display(heading_fig)
display(vmg_fig)

episode = 0

colormap = ['blue', 'red', 'green', 'orange', 'black']

def plot(data, marks, bounds):
    global out, fig
    x_values = [point['x'] for point in data]
    y_values = [point['y'] for point in data]
    meta_values = [[f"{k}: {'%.3f' % v}" for (k, v) in point['meta'].items()] for point in data]

    rewards = [point['meta']['reward'] for point in data]
    cumulative_reward = np.cumsum([0] + [reward - rewards[0] for reward in rewards])

    meta_values = ["<br>".join(meta + [f"Cumulative Reward: {cumulative_reward[i]}"]) for i, meta in enumerate(meta_values)]

    min_x, max_x, min_y, max_y = bounds

    color_meta = [point['meta']['current_mark'] for point in data]
    colors = [colormap[color % len(colormap)] for color in color_meta]

    headings = [point['meta']['heading'] for point in data]
    vmg = [point['meta']['vmg'] for point in data]

    global_meta = {'final_position': (x_values[-1], y_values[-1]), 'reward': sum([point['meta']['reward'] for point in data]), 'iters': len(data), 'episode': episode}

    # Update data
    scatter = fig.data[0]
    scatter.x = x_values
    scatter.y = y_values
    scatter.hovertext = meta_values  # Add this line
    scatter.marker.color = colors
    scatter.line.color = 'lightgrey'

    marks_scatter = fig.data[1]
    marks_scatter.x = [x for x, _ in marks]
    marks_scatter.y = [y for _, y in marks]
    marks_scatter.text = list(range(len(marks)))
    marks_scatter.textposition = 'bottom right'
    marks_scatter.marker.color = 'black'
    marks_scatter.marker.symbol = 'x'
    marks_scatter.marker.size = 10

    heading_scatter = heading_fig.data[0]
    heading_scatter.x = list(range(len(headings)))
    heading_scatter.y = headings
    heading_scatter.marker.color = colors
    heading_scatter.line.color = 'lightgrey'

    vmg_scatter = vmg_fig.data[0]
    vmg_scatter.x = list(range(len(vmg)))
    vmg_scatter.y = vmg
    vmg_scatter.marker.color = colors
    vmg_scatter.line.color = 'lightgrey'

    fig.update_xaxes(range=[min_x * 1.1, max_x * 1.1],dtick=25)
    fig.update_yaxes(range=[min_y * 1.1, max_y * 1.1],dtick=25)
    fig.layout.width=800
    fig.layout.height=800

    heading_fig.update_yaxes(range=[-210, 210],dtick=30)
    heading_fig.layout.width=800
    heading_fig.layout.height=600

    vmg_fig.layout.width=800
    vmg_fig.layout.height=400

    # Update the text output
    with out:
        out.clear_output(wait=True)
        print(f"Global Meta: {str(global_meta)}")

None

Output()

FigureWidget({
    'data': [{'mode': 'markers+lines',
              'name': 'trajectory',
              'type': 'scatter',
              'uid': '40c71390-2579-4b10-b8e6-a35942ea2c06'},
             {'mode': 'markers+text', 'name': 'marks', 'type': 'scatter', 'uid': '00358ec7-bb4f-4733-9fb9-3f22dfe98006'}],
    'layout': {'height': 800,
               'template': '...',
               'width': 800,
               'xaxis': {'dtick': 25, 'range': [-562.5, 562.5]},
               'yaxis': {'dtick': 25, 'range': [-562.5, 562.5]}}
})

FigureWidget({
    'data': [{'mode': 'markers+lines',
              'name': 'heading',
              'type': 'scatter',
              'uid': '3fb20f0e-97ca-464d-b8af-1e4717e6a26b'}],
    'layout': {'height': 600,
               'template': '...',
               'title': {'text': 'Heading over time'},
               'width': 800,
               'xaxis': {'dtick': 25, 'range': [0, 200.0]},
               'yaxis': {'dtick': 30, 'range': [-210, 210]}}
})

FigureWidget({
    'data': [{'mode': 'markers+lines', 'type': 'scatter', 'uid': '93cbdc03-5985-4d25-9768-d9c0793ff9c9'}],
    'layout': {'height': 400,
               'template': '...',
               'title': {'text': 'VMG over time'},
               'width': 800,
               'xaxis': {'dtick': 25, 'range': [0, 200.0]},
               'yaxis': {'dtick': 2, 'range': [-4, 4]}}
})

In [3]:
# Configuration

r = 250
actions = np.array([-5, -1, 0, 1, 5]) / 180.0

config = {
    'max_marks': 1,
    'max_seconds_per_leg': 500,
    'plot_fn': plot,
    'leg_radius': r,
    'actions': actions,
    'target_tolerance_multiplier': 0.5
}

outer_radius = config['max_marks'] * 2 * r + 0.2 * r
bounds = [-outer_radius, outer_radius, -outer_radius, outer_radius]

args = {
    'config': config,
    'dt': 1,
    'bounds': bounds,
    'seq_size': 3,
    'target_phase_steps': 8,
    'heading_phase_steps': 8,
    'radius_multipliers': [0.5, 1, 1.5, 2]
}

# Initialize Environment
env = MultiMarkNoModelEnv(**args)

# Initialize PPO model
model = PPO("MlpPolicy", env, verbose=1, device='cuda', policy_kwargs={
    "net_arch": [64, 500, 400],
    # "features_extractor_class": SelfAttentionExtractor,
    # "features_extractor_kwargs": {"features_dim": 64}
})
# Initialize PPO model
model = PPO.load('./BoatControl_boat_model_fine.dat')

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [4]:
targets = [(0, 250), (250, 250), (250, 0), (0, 0), (-250, 0), (-250, -250), (0, -250), (0, 0)]
# targets = [(0, 250)]

full_trajectory = []

start_x = 0
start_y = 0
start_heading = 2*np.pi - np.pi/4
start_vmg = 0
for mark_idx, (target_x, target_y) in enumerate(targets):
    best_trajectory = None
    best_reward = None
    for i in range(10):
        obs, _ = env.reset(heading=start_heading, vmg=start_vmg)
        env.target_x[0] = target_x - start_x
        env.target_y[0] = target_y - start_y
        done = False
        total_reward = 0
        while not done:
            action, _states = model.predict(obs)
            obs, reward, is_terminal, is_truncated, info = env.step(action)
            done = is_terminal or is_truncated
            total_reward += reward

        if best_reward is None or total_reward > best_reward:
            best_reward = total_reward
            best_trajectory = env.trajectory
            next_start = (best_trajectory[-1]['x'] + start_x, best_trajectory[-1]['y'] + start_y, best_trajectory[-1]['meta']['heading'] * np.pi / 180, best_trajectory[-1]['meta']['vmg'])

    for item in best_trajectory:
        item['x'] += start_x
        item['y'] += start_y
        item['meta']['current_mark'] = mark_idx

    full_trajectory.extend(best_trajectory)

    start_x, start_y, start_heading, start_vmg = next_start


plot(full_trajectory, targets, bounds)

Missed mark
Missed mark
Missed mark
Missed mark
Missed mark
Missed mark
Missed mark
Missed mark
Missed mark
Missed mark
Missed mark
Missed mark
Missed mark
Missed mark


In [5]:
full_trajectory

[{'x': 0.0,
  'y': 0.0,
  'meta': {'current_mark': 0,
   'vmg': 0,
   'heading': -45.0,
   'reward': 0,
   'speed': 0,
   'distance': 124.99999999999999,
   'min_distance': 124.99999999999999,
   'has_missed_mark': False}},
 {'x': 0.0,
  'y': 0.0,
  'meta': {'current_mark': 0,
   'vmg': 0.0,
   'heading': -39.99999999999994,
   'reward': 0.0,
   'speed': 0,
   'distance': 250.0,
   'min_distance': 124.99999999999999,
   'has_missed_mark': False}},
 {'x': 0.0,
  'y': 0.0,
  'meta': {'current_mark': 0,
   'vmg': 0.0,
   'heading': -34.99999999999994,
   'reward': 0.0,
   'speed': 0,
   'distance': 250.0,
   'min_distance': 124.99999999999999,
   'has_missed_mark': False}},
 {'x': 0.0,
  'y': 0.0,
  'meta': {'current_mark': 0,
   'vmg': 0.0,
   'heading': -29.999999999999943,
   'reward': 0.0,
   'speed': 0,
   'distance': 250.0,
   'min_distance': 124.99999999999999,
   'has_missed_mark': False}},
 {'x': 0.0,
  'y': 0.0,
  'meta': {'current_mark': 0,
   'vmg': 0.0,
   'heading': -24.9999