In [11]:
import gymnasium as gym
import matplotlib.pyplot as plt
import numpy as np
import time
import pprint
from tqdm.notebook import trange
import tensorflow as tf
from sac import Agent
from obs import observation_shape

filename = 'inverted_pendulum.png'

best_score = -10000.0
best_score = -1000.0
score_history = []
episode_lens = []
avg_history = []
std_history = []
avg_history_100 = []

In [12]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [13]:
env = gym.make('racetrack-v0', render_mode = 'rgb_array')
env.configure({   "action": {"lateral": True,
            "longitudinal": True,
            "type": "ContinuousAction"},
            "observation": {
                "type": "GrayscaleObservation",
                "observation_shape": (64, 32),
                "stack_size": 2,
                "weights": [0.2989, 0.5870, 0.1140],
                "scaling": 1.75},
    "show_trajectories": True,
    "duration": 80,
    "collision_reward": -1,
    "simulation_frequency": 15,
    "policy_frequency": 5,
    "real_time_rendering": True
}) # type: ignore
(obs, info), done = env.reset(), False
input = observation_shape(obs) 
print(input)


[[[0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]
  ...
  [0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]]

 [[0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]
  ...
  [0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]]

 [[0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]
  ...
  [0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]]

 ...

 [[0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]
  ...
  [0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]]

 [[0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]
  ...
  [0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]]

 [[0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]
  ...
  [0.         0.38823529]
  [0.         0.38823529]
  [0.         0.38823529]]]


In [14]:
agent = Agent(input_dims=input.shape, env=env,
            n_actions=2)

In [17]:
# main loop
for episode in trange(450, desc='Test episodes'):

        (observation, info), done = env.reset(), False
        truncated = False
        observation = observation_shape(observation) 

        episode_reward = 0
        episode_len = 0
        while (not done) and (not truncated):
            action = agent.choose_action(observation)

            new_observation, reward, done, truncated, new_info = env.step(action=action)
            new_observation = observation_shape(new_observation)
            
            episode_reward += reward # type: ignore
            episode_len +=1

            if new_info["rewards"]["on_road_reward"] == False or episode_len >= 200:
                truncated = True

            agent.remember(state=observation, action=action, done=done,
                            reward=reward, new_state=new_observation)
            agent.learn()

            observation = new_observation

        episode_lens.append(episode_len)

        score_history.append(episode_reward)
        avg_score = np.mean(score_history)
        avg_history.append(avg_score)
        std_score = np.std(score_history)
        std_history.append(std_score)

        avg_score_100 = np.mean(score_history[-100:])
        avg_history_100.append(avg_score_100)


        agent.tensorboard.update_stats(episode_rew = episode_reward, 
                                       average_rew =avg_score,
                                       average_100_reward = avg_score_100,
                                       std_rew=std_score, 
                                       episode_len = episode_len)

        print('episode ', episode, 'score %.1f' % episode_reward, 'ep len', episode_len,
              'avg score %.1f' % avg_score, 'avg_score_100 %.1f' %avg_score_100,'std score %.1f' % std_score)



Test episodes:   0%|          | 0/30 [00:00<?, ?it/s]

Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-0.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-0.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-0.mp4
episode  0 score 2.2 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-1.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-1.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-1.mp4
episode  1 score 2.3 ep len 7 avg score 2.2 avg_score_100 2.2 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-2.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-2.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-2.mp4
episode  2 score 2.1 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-3.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-3.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-3.mp4
episode  3 score 2.0 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-4.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-4.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-4.mp4
episode  4 score 2.3 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-5.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-5.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-5.mp4
episode  5 score 1.9 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-6.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-6.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-6.mp4
episode  6 score 2.0 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-7.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-7.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-7.mp4
episode  7 score 1.9 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-8.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-8.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-8.mp4
episode  8 score 2.2 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-9.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-9.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-9.mp4
episode  9 score 2.1 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-10.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-10.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-10.mp4
episode  10 score 2.0 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-11.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-11.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-11.mp4
episode  11 score 2.4 ep len 8 avg score 2.1 avg_score_100 2.1 std score 0.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-12.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-12.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-12.mp4
episode  12 score 2.4 ep len 7 avg score 2.1 avg_score_100 2.1 std score 0.2


2023-12-22 22:39:13.779353: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55d897179550 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-12-22 22:39:13.779372: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
2023-12-22 22:39:13.784500: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-12-22 22:39:13.863204: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-13.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-13.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-13.mp4
episode  13 score 0.6 ep len 3 avg score 2.0 avg_score_100 2.0 std score 0.4
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-14.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-14.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-14.mp4
episode  14 score -1.4 ep len 10 avg score 1.8 avg_score_100 1.8 std score 0.9
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-15.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-15.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-15.mp4
episode  15 score 0.2 ep len 7 avg score 1.7 avg_score_100 1.7 std score 1.0
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-16.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-16.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-16.mp4
episode  16 score 0.3 ep len 21 avg score 1.6 avg_score_100 1.6 std score 1.0
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-17.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-17.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-17.mp4
episode  17 score -0.2 ep len 2 avg score 1.5 avg_score_100 1.5 std score 1.1
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-18.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-18.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-18.mp4
episode  18 score -1.0 ep len 8 avg score 1.4 avg_score_100 1.4 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-19.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-19.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-19.mp4
episode  19 score -0.5 ep len 14 avg score 1.3 avg_score_100 1.3 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-20.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-20.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-20.mp4
episode  20 score 0.1 ep len 7 avg score 1.3 avg_score_100 1.3 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-21.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-21.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-21.mp4
episode  21 score 1.1 ep len 7 avg score 1.3 avg_score_100 1.3 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-22.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-22.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-22.mp4
episode  22 score -0.4 ep len 4 avg score 1.2 avg_score_100 1.2 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-23.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-23.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-23.mp4
episode  23 score 1.4 ep len 7 avg score 1.2 avg_score_100 1.2 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-24.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-24.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-24.mp4
episode  24 score 0.7 ep len 7 avg score 1.2 avg_score_100 1.2 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-25.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-25.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-25.mp4
episode  25 score 0.1 ep len 8 avg score 1.1 avg_score_100 1.1 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-26.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-26.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-26.mp4
episode  26 score -1.6 ep len 8 avg score 1.0 avg_score_100 1.0 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-27.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-27.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-27.mp4
episode  27 score 1.4 ep len 7 avg score 1.1 avg_score_100 1.1 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-28.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-28.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-28.mp4
episode  28 score 1.9 ep len 29 avg score 1.1 avg_score_100 1.1 std score 1.2
Moviepy - Building video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-29.mp4.
Moviepy - Writing video /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-29.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/two_acts_cnn/SAC/videos/rl-video-episode-29.mp4
episode  29 score 13.1 ep len 26 avg score 1.5 avg_score_100 1.5 std score 2.4


In [6]:
n_steps = sum(episode_lens)
print(n_steps)


26210


In [18]:

# env = record_videos(env)
from gymnasium.wrappers import RecordVideo

# data = load_config()
# env = gym.make('racetrack-v0', render_mode = 'rgb_array')
# env.configure(data) # type: ignore

env = RecordVideo(env, video_folder="videos", episode_trigger=lambda e: True)
env.unwrapped.set_record_video_wrapper(env)

agent.load_models(450)

for episode in trange(1, desc='Test episodes'):

        (observation, info), done = env.reset(), False
        truncated = False
        observation = observation_shape(observation) 

        episode_reward = 0
        episode_len = 0
        while (not done) and (not truncated):
            action = agent.choose_action(observation)

            new_observation, reward, done, truncated, new_info = env.step(action=action)
            new_observation = observation_shape(new_observation)
            
            episode_reward += reward # type: ignore
            episode_len +=1

            if new_info["rewards"]["on_road_reward"] == False or episode_len >= 200:
                truncated = True

            agent.remember(state=observation, action=action, done=done,
                            reward=reward, new_state=new_observation)
            agent.learn()

            observation = new_observation


env.close()


... loading models ...


ValueError: Unable to load weights saved in HDF5 format into a subclassed Model which has not created its variables yet. Call the Model first, then load the weights.