### Importing Dependencies

In [1]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
import os

### Test Environment

In [11]:
env = gym.make("CarRacing-v2", domain_randomize=False, render_mode="human")

In [32]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, trunc, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

In [31]:
env.close()

### Train Model

In [2]:
log_path = os.path.join('Training', 'Logs')

In [38]:
from stable_baselines3.common.env_util import make_vec_env

vec_env = make_vec_env("CarRacing-v0", n_envs=1)

In [39]:
model = PPO("CnnPolicy", vec_env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [47]:
!pip install pyglet==1.5.27

Collecting pyglet==1.5.27
  Downloading pyglet-1.5.27-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m468.1 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pyglet
  Attempting uninstall: pyglet
    Found existing installation: pyglet 2.0.7
    Uninstalling pyglet-2.0.7:
      Successfully uninstalled pyglet-2.0.7
Successfully installed pyglet-1.5.27


In [40]:
model.learn(total_timesteps=428000)

Track generation: 1131..1418 -> 287-tiles track
Logging to Training/Logs/PPO_10
Track generation: 1111..1399 -> 288-tiles track
Track generation: 1078..1357 -> 279-tiles track
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -58.1    |
| time/              |          |
|    fps             | 129      |
|    iterations      | 1        |
|    time_elapsed    | 15       |
|    total_timesteps | 2048     |
---------------------------------
Track generation: 1198..1512 -> 314-tiles track
Track generation: 1116..1399 -> 283-tiles track
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -57.8       |
| time/                   |             |
|    fps                  | 79          |
|    iterations           | 2           |
|    time_elapsed         | 51          |
|    total_timesteps      | 4096        |
| train/  

<stable_baselines3.ppo.ppo.PPO at 0x7f23d7208ca0>

### Saving Model

In [41]:
ppo_path = os.path.join('Training', 'Saved_Models', 'PPO_428k_Driving_model')

In [42]:
model.save(ppo_path)

### Evaluate and Test

In [45]:
evaluate_policy(model, vec_env, n_eval_episodes=1, render=True)
env.close()

In [44]:
import imageio
import numpy as np

env = make_vec_env("CarRacing-v0", n_envs=1)
images = []
obs = env.reset()
img = env.render(mode="rgb_array")
for _ in range(500):
    images.append(img)
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    img = env.render(mode="rgb_array")
    # env.render()

env.close()
imageio.mimsave("self_driving_car_trained_agent_428k_steps.gif", [np.array(img) for i, img in enumerate(images) if i%2 == 0], duration=40)

Track generation: 1314..1647 -> 333-tiles track
