In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import gymnasium as gym

In [None]:
from runner import Runner
from rl_lib.algorithms.actor_critic import ActorCritic
from rl_lib.utils.env_wrapper import EnvWrapper

In [121]:
env = EnvWrapper(gym.make("MountainCarContinuous-v0"))
class ActorCriticNetwork(nn.Module):
    def __init__(self, state_dim, action_dim):
        # ---------------------- DO NOT MODIFY ----------------------
        super(ActorCriticNetwork, self).__init__()
        self._state_dim = state_dim
        self._action_dim = action_dim
        # ------------------------------------------------------------

        self._actor_fc1 = nn.Linear(state_dim, 64)
        self._actor_fc2 = nn.Linear(64, action_dim * 2)

        self._critic_fc1 = nn.Linear(state_dim, 64)
        self._critic_fc2 = nn.Linear(64, 1)

    def actor(self, x):
        x = F.tanh(self._actor_fc1(x))
        mu, std = self._actor_fc2(x).chunk(2, dim=-1)
        return mu, F.softplus(std) + 1e-5

    def critic(self, x):
        x = F.relu(self._critic_fc1(x))
        x = self._critic_fc2(x)
        return x

state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]

network = ActorCriticNetwork(state_dim, action_dim)
optimizer = torch.optim.Adam(network.parameters(), lr=0.005)
gamma = 0.95
configs = {
    "is_continuous": True,
    "n_step": 256,
    "entropy_coef": 0.05,
}

In [122]:
agent = ActorCritic(network, optimizer, gamma, configs)
runner = Runner(env, agent)

In [124]:
runner.train_by_episode(500, 1000, verbose=True)

Episode 0 rewards: -110.16462790486361 steps: 469
Episode 1 rewards: -58.17883675476415 steps: 367
Episode 2 rewards: -30.031724760062303 steps: 291
Episode 3 rewards: -396.9098666408165 steps: 1000
Episode 4 rewards: -87.07029362374121 steps: 467
Episode 5 rewards: 25.238258148924984 steps: 223
Episode 6 rewards: 28.79693842866287 steps: 225
Episode 7 rewards: -338.5412089571348 steps: 1000
Episode 8 rewards: 3.0743494154581015 steps: 305
Episode 9 rewards: -41.80390060260146 steps: 471
Episode 10 rewards: -99.43428833848021 steps: 730
Episode 11 rewards: -65.509126743101 steps: 565
Episode 12 rewards: -12.712365981091551 steps: 321
Episode 13 rewards: -4.87905611258148 steps: 318
Episode 14 rewards: -97.56950222632264 steps: 628
Episode 15 rewards: 41.23490668373686 steps: 219
Episode 16 rewards: -14.160735653322206 steps: 390
Episode 17 rewards: -36.65263323535041 steps: 471
Episode 18 rewards: -58.904935067310646 steps: 636
Episode 19 rewards: 34.095280486137185 steps: 275
Episode 

In [125]:
env = EnvWrapper(gym.make("MountainCarContinuous-v0", render_mode="human"))
runner.visualize(env, 1500)

Step: 1, rewards: -0.17524018527297472
Step: 2, rewards: -0.49075836637662895
Step: 3, rewards: -0.4907881448444006
Step: 4, rewards: -0.5993809535140703
Step: 5, rewards: -0.678897153683473
Step: 6, rewards: -1.2210030138957415
Step: 7, rewards: -1.4045652683463823
Step: 8, rewards: -1.4045687593004428
Step: 9, rewards: -1.4973842210049328
Step: 10, rewards: -2.1009113267284705
Step: 11, rewards: -2.2889422267066792
Step: 12, rewards: -2.3644075053038565
Step: 13, rewards: -2.37823009674541
Step: 14, rewards: -2.383833326034149
Step: 15, rewards: -2.383965859717557
Step: 16, rewards: -2.3884901894283077
Step: 17, rewards: -2.8110475293935737
Step: 18, rewards: -3.0505318292892185
Step: 19, rewards: -3.0668477166761425
Step: 20, rewards: -3.2170006879859563
Step: 21, rewards: -3.296300574464431
Step: 22, rewards: -3.3394110204284124
Step: 23, rewards: -3.400401984111337
Step: 24, rewards: -3.4330851436125704
Step: 25, rewards: -3.5789492116197072
Step: 26, rewards: -3.578949977329203
S

In [126]:
import pygame
pygame.quit()