In [2]:
import sys
import torch
from Agent import TD3Agent
from Networks import Critic, Actor

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
if "google.colab" in sys.modules:
    from google.colab import drive
    drive.mount("/content/drive")
    %cd "/content/drive/MyDrive/Python/Bath University/RL1_CW/Louie/TD3"
    !pip install swig
    !pip install gymnasium[box2d]

# Training - Stage 1 - Complete Normal Mode

In [3]:
actor = Actor()
critic1 = Critic()
critic2 = Critic()

agent = TD3Agent(critic_network1=critic1,
                 critic_network2=critic2,
                 actor_network=actor,
                 device=device,
                 hardcore=False,
                 max_buffer_length=1_000_000)

agent.learn(n_episodes=2000,
            discount_factor=0.99,
            minibatch_size=256,
            tau=0.005,
            random_exploration_steps=10_000,
            actor_exploration_steps=1000,
            vid_every=50,
            stop_after=1,
            reset_optim=True,
            reset_buffer=True,
            # ====================== #
            critic_lr=3e-4,
            actor_lr=3e-4,
            critic_grad_clip=1.0,
            actor_grad_clip=1.0,
            exploratory_noise_start=0.3,
            exploratory_noise_min=0.05,
            exploratory_noise_decay=1e-6,
            exploratory_noise_clip=0.3,
            policy_noise=0.2,
            policy_noise_clip=0.5,
            policy_delay=2,
            updates_per_step=1)

  from pkg_resources import resource_stream, resource_exists


Performing Random Exploration...
Performing Actor Exploration...
Running Episode 1...
Reward: -119.06 - Step Count: 98 - Run Time: 0.55s - Total Step Count: 98 - Exploratory Noise: 0.29990

Reward: -120.07 - Step Count: 41 - Run Time: 0.13s

Running Episode 2...
Reward: -118.38 - Step Count: 42 - Run Time: 0.29s - Total Step Count: 140 - Exploratory Noise: 0.29986
Running Episode 3...
Reward: -121.93 - Step Count: 77 - Run Time: 0.43s - Total Step Count: 217 - Exploratory Noise: 0.29978
Running Episode 4...
Reward: -110.63 - Step Count: 84 - Run Time: 0.48s - Total Step Count: 301 - Exploratory Noise: 0.29970
Running Episode 5...
Reward: -105.50 - Step Count: 58 - Run Time: 0.32s - Total Step Count: 359 - Exploratory Noise: 0.29964
Running Episode 6...
Reward: -104.43 - Step Count: 76 - Run Time: 0.46s - Total Step Count: 435 - Exploratory Noise: 0.29957
Running Episode 7...
Reward: -116.98 - Step Count: 70 - Run Time: 0.41s - Total Step Count: 505 - Exploratory Noise: 0.29950
Running 

KeyboardInterrupt: 

# Training - Stage 2 - Go Faster

In [4]:
agent.learn(n_episodes=500,
            discount_factor=0.99,
            minibatch_size=256,
            tau=0.005,
            random_exploration_steps=0,
            actor_exploration_steps=0,
            vid_every=50,
            stop_after=None,
            reset_optim=False,
            reset_buffer=False,
            # ====================== #
            critic_lr=3e-4,
            actor_lr=3e-4,
            critic_grad_clip=1.0,
            actor_grad_clip=1.0,
            exploratory_noise_start=0.1,
            exploratory_noise_min=0.1,
            exploratory_noise_decay=0,
            exploratory_noise_clip=0.3,
            policy_noise=0.2,
            policy_noise_clip=0.5,
            policy_delay=2,
            updates_per_step=1)

Performing Random Exploration...
Performing Actor Exploration...
Running Episode 1...
Reward: 296.40 - Step Count: 1010 - Run Time: 7.92s - Total Step Count: 1010 - Exploratory Noise: 0.10000

Reward: 299.57 - Step Count: 984 - Run Time: 15.09s

Running Episode 2...
Reward: 296.23 - Step Count: 1009 - Run Time: 7.48s - Total Step Count: 2019 - Exploratory Noise: 0.10000
Running Episode 3...
Reward: 296.08 - Step Count: 1027 - Run Time: 7.97s - Total Step Count: 3046 - Exploratory Noise: 0.10000
Running Episode 4...
Reward: 296.41 - Step Count: 998 - Run Time: 7.09s - Total Step Count: 4044 - Exploratory Noise: 0.10000
Running Episode 5...
Reward: 297.26 - Step Count: 1010 - Run Time: 7.77s - Total Step Count: 5054 - Exploratory Noise: 0.10000
Running Episode 6...
Reward: 299.37 - Step Count: 1010 - Run Time: 7.04s - Total Step Count: 6064 - Exploratory Noise: 0.10000
Running Episode 7...
Reward: 299.53 - Step Count: 987 - Run Time: 7.65s - Total Step Count: 7051 - Exploratory Noise: 0.

# Training - Stage 3 - Complete Hardcore Mode



In [6]:
critic1 = Critic()
critic2 = Critic()

stage2_actor = Actor()
stage2_actor.load_state_dict(torch.load("outputs/stage 2/actor_network.pth", map_location=device))

agent = TD3Agent(critic_network1=critic1,
                 critic_network2=critic2,
                 actor_network=stage2_actor,
                 device=device,
                 hardcore=True,
                 max_buffer_length=1_000_000)

agent.learn(n_episodes=2000,
            discount_factor=0.99,
            minibatch_size=256,
            tau=0.005,
            random_exploration_steps=10_000,
            actor_exploration_steps=1000,
            vid_every=50,
            stop_after=1,
            reset_optim=True,
            reset_buffer=True,
            # ====================== #
            critic_lr=3e-4,
            actor_lr=3e-4,
            critic_grad_clip=1.0,
            actor_grad_clip=1.0,
            exploratory_noise_start=0.1,
            exploratory_noise_min=0.1,
            exploratory_noise_decay=0,
            exploratory_noise_clip=0.3,
            policy_noise=0.2,
            policy_noise_clip=0.5,
            policy_delay=2,
            updates_per_step=1)

Performing Random Exploration...
Performing Actor Exploration...
Running Episode 1...
Reward: -84.52 - Step Count: 150 - Run Time: 1.03s - Total Step Count: 150 - Exploratory Noise: 0.10000

Reward: -117.39 - Step Count: 72 - Run Time: 0.58s

Running Episode 2...
Reward: -127.20 - Step Count: 113 - Run Time: 0.80s - Total Step Count: 263 - Exploratory Noise: 0.10000
Running Episode 3...
Reward: -119.49 - Step Count: 110 - Run Time: 0.75s - Total Step Count: 373 - Exploratory Noise: 0.10000
Running Episode 4...
Reward: -117.09 - Step Count: 94 - Run Time: 0.66s - Total Step Count: 467 - Exploratory Noise: 0.10000
Running Episode 5...
Reward: -114.67 - Step Count: 66 - Run Time: 0.47s - Total Step Count: 533 - Exploratory Noise: 0.10000
Running Episode 6...
Reward: -137.07 - Step Count: 138 - Run Time: 1.21s - Total Step Count: 671 - Exploratory Noise: 0.10000
Running Episode 7...
Reward: -133.45 - Step Count: 159 - Run Time: 1.42s - Total Step Count: 830 - Exploratory Noise: 0.10000
Run