In [1]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [2]:


%pip install swig
%pip install stable-baselines3[extra]
%pip install gymnasium[box2d]




In [3]:
from IPython.display import Javascript

def keep_alive():
    display(Javascript('''
    function ConnectButton(){
        console.log("Keeping session alive");
        document.querySelector("colab-toolbar-button#connect").click()
    }
    setInterval(ConnectButton, 60*1000);
    '''))

keep_alive()

<IPython.core.display.Javascript object>

In [4]:

import gymnasium as gym
import numpy as np
import torch
import cv2
from collections import deque
import torch.nn as nn
from gym.wrappers import RecordVideo
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv


In [None]:
def preprocess_frame(frame, resolution=(84, 84), grayscale=True):
    if grayscale:
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    frame = cv2.resize(frame, resolution)
    frame = frame.astype(np.uint8)  
    if grayscale:
        frame = np.expand_dims(frame, axis=-1)
    return frame



In [6]:

class FrameStack:
    def __init__(self, k, resolution=(84, 84), grayscale=True):
        self.k = k
        self.frames = deque(maxlen=k)
        self.resolution = resolution
        self.grayscale = grayscale

    def reset(self, obs):
        frame = preprocess_frame(obs, self.resolution, self.grayscale)
        for _ in range(self.k):
            self.frames.append(np.copy(frame))
        return np.concatenate(self.frames, axis=-1)

    def step(self, obs):
        frame = preprocess_frame(obs, self.resolution, self.grayscale)
        self.frames.append(frame)
        return np.concatenate(self.frames, axis=-1)



In [None]:
class PreprocessedCarRacing(gym.Wrapper):
    def __init__(self, env, frame_stack=4, resolution=(84, 84), grayscale=True):
        super().__init__(env)
        self.frame_stack = FrameStack(frame_stack, resolution, grayscale)
        channels = frame_stack if grayscale else 3 * frame_stack
        self.observation_space = gym.spaces.Box(
            low=0, high=255, shape=(channels, resolution[0], resolution[1]), dtype=np.uint8
        )


    def reset(self, **kwargs):
        obs, info = self.env.reset(**kwargs)
        # Stack came out (H, W, C), so I had to transpose to (C, H, W)
        stacked = self.frame_stack.reset(obs)
        return np.transpose(stacked, (2, 0, 1)), info

    def step(self, action):
        obs, reward, terminated, truncated, info = self.env.step(action)
        stacked = self.frame_stack.step(obs)
        return np.transpose(stacked, (2, 0, 1)), reward, terminated, truncated, info


In [None]:
import gymnasium as gym
from gymnasium.wrappers import RecordEpisodeStatistics, RecordVideo, NormalizeObservation

def make_env():
    env = gym.make("CarRacing-v3", render_mode="rgb_array")
    env = RecordEpisodeStatistics(env)  # Monitor rewards
    # add any additional wrappers here, e.g. PreprocessedCarRacing
    return env




In [9]:
# from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
# import torch
# import torch.nn as nn

# class CustomCNNExtractor(BaseFeaturesExtractor):
#     def __init__(self, observation_space, features_dim=512):
#         super().__init__(observation_space, features_dim)
#         # Assuming input is (channels, height, width)
#         n_input_channels = observation_space.shape[0] # Corrected to get the number of channels
#         self.cnn = nn.Sequential(
#             nn.Conv2d(n_input_channels, 32, kernel_size=8, stride=4),
#             nn.ReLU(),
#             nn.Conv2d(32, 64, kernel_size=4, stride=2),
#             nn.ReLU(),
#             nn.Conv2d(64, 64, kernel_size=3, stride=1),
#             nn.ReLU(),
#             nn.Flatten()
#         )

#         # Compute output of CNN to set features_dim
#         with torch.no_grad():
#             # sample = torch.zeros(1, n_input_channels, observation_space.shape[0], observation_space.shape[1])
#             # Corrected sample input shape
#             sample = torch.zeros(1, observation_space.shape[0], observation_space.shape[1], observation_space.shape[2])
#             cnn_out = self.cnn(sample)
#         self._features_dim = cnn_out.shape[1]

#     def forward(self, observations):
#         return self.cnn(observations)

In [None]:
from stable_baselines3.common.callbacks import CheckpointCallback


def train_ppo_agent(total_timesteps=1_000_000, model_path="ppo_carracing"):
    env = DummyVecEnv([make_env])


    # policy_kwargs = dict(
    # features_extractor_class=CustomCNNExtractor,
    # features_extractor_kwargs=dict(features_dim=512),
    # )

    # model = PPO("CnnPolicy", env, policy_kwargs=policy_kwargs, verbose=1)


    checkpoint_callback = CheckpointCallback(
    save_freq=100_000,  # Each 100,000 env steps
    save_path='./models/',  # Directory in Colab (downloads as .zip)
    name_prefix=model_path
    )

    model = PPO("CnnPolicy", env, verbose=1, tensorboard_log="./ppo_carracing_tensorboard/")
    model.learn(total_timesteps=total_timesteps, callback=checkpoint_callback)  # Can try less for quick test

    # model.learn(total_timesteps=1_000_000, callback=checkpoint_callback)

    model.save(model_path)

    print(f" Model saved at: {model_path}")






In [11]:

# def evaluate(model_path, checkpoint_freq=100_000):
#     env = make_env()
#     model = PPO.load(model_path)

#     obs, _ = env.reset() #observation, info
#     done = False
#     frames = []

#     while not done:
#         action, _ = model.predict(obs)
#         obs, reward, terminated, truncated, _ = env.step(action)
#         done = terminated or truncated
#         frames.append(env.render())

    # # Saving video
    # import imageio
    # imageio.mimsave(output_video_path, frames, fps=fps)
    # print(f" Video saved to: {output_video_path}")


In [12]:

train_ppo_agent(total_timesteps=1_000_000, model_path="ppo_carracing")


# evaluate(model_path="ppo_carracing", checkpoint_freq=100_000)




  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
|    n_updates            | 2610       |
|    policy_gradient_loss | 0.0319     |
|    std                  | 0.506      |
|    value_loss           | 5.37       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 998        |
|    ep_rew_mean          | 333        |
| time/                   |            |
|    fps                  | 78         |
|    iterations           | 263        |
|    time_elapsed         | 6897       |
|    total_timesteps      | 538624     |
| train/                  |            |
|    approx_kl            | 0.49525222 |
|    clip_fraction        | 0.702      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.18      |
|    explained_variance   | 0.919      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.286      |
|    n_updates            | 2620 

In [None]:
#running all this on colab to then download the policy

from google.colab import files
import os

# Listing all files in the models directory and downloading each one
model_dir = './models/'
for filename in os.listdir(model_dir):
    file_path = os.path.join(model_dir, filename)
    if os.path.isfile(file_path):
        files.download(file_path)



# files.download('./models/ppo_carracing_100000_steps.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>