## Train Agent on Osu Environment using PPO LSTM

In [1]:
from environment import OsuEnvironment
from ppo_agent import PPO_Agent
from model import LSTM_Actor, LSTM_Critic
import torch
import torch.nn as nn
import torch.optim as optim
import os
import warnings
import logging

if not os.path.exists('yolov5'):
    !git clone https://github.com/ultralytics/yolov5
    !pip install -r yolov5/requirements.txt

warnings.simplefilter("ignore", FutureWarning)
logging.getLogger('ultralytics').setLevel(logging.ERROR)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# define arguments for Agent
env = OsuEnvironment(num_frame=15)
behavior_cloning = True
hidden_size = 128
num_layer = 2
dropout = 0.1
batch_size = 64
n_epoch = 5

actor_net = LSTM_Actor(env.observation_space, env.action_space.nvec, hidden_size=hidden_size, num_layer=num_layer, dropout=dropout).to(device)
critic_net = LSTM_Critic(env.observation_space, hidden_size=hidden_size, num_layer=num_layer, dropout=dropout).to(device)

actor_optimizer = optim.Adam(actor_net.parameters(), lr=0.001)
critic_optimizer = optim.Adam(critic_net.parameters(), lr=0.001)

agent = PPO_Agent(env=env, 
                  actor_net=actor_net, 
                  critic_net=critic_net,
                  actor_optimizer=actor_optimizer, 
                  critic_optimizer=critic_optimizer, 
                  batch_size=batch_size,
                  n_epoch=n_epoch, 
                  behavior_cloning=True)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\bohui/.cache\torch\hub\master.zip
YOLOv5  2024-12-2 Python-3.11.5 torch-2.5.0+cu118 CUDA:0 (NVIDIA GeForce RTX 3050 Ti Laptop GPU, 4096MiB)

Fusing layers... 
Model summary: 157 layers, 7018216 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 


Listening on 127.0.0.1:5555


In [3]:
agent.pretrain(total_episode=200)

Episod 10, pre-training loss: 0.048103951528801696
Episod 20, pre-training loss: 0.02689887400025566
Episod 30, pre-training loss: 0.017904549141292286
Episod 40, pre-training loss: 0.013196834974952934
Episod 50, pre-training loss: 0.010603529787507891
Episod 60, pre-training loss: 0.00870633464921312
Episod 70, pre-training loss: 0.007744373369864627
Episod 80, pre-training loss: 0.007191756078796447
Episod 90, pre-training loss: 0.006561091016136256
Episod 100, pre-training loss: 0.006152456134986574
Episod 110, pre-training loss: 0.005998616138883966
Episod 120, pre-training loss: 0.005685568765204132
Episod 130, pre-training loss: 0.005225965949524465
Episod 140, pre-training loss: 0.005127025554863127
Episod 150, pre-training loss: 0.004753652918235845
Episod 160, pre-training loss: 0.004893571155197405
Episod 170, pre-training loss: 0.004738511629535483
Episod 180, pre-training loss: 0.004520072712417267
Episod 190, pre-training loss: 0.004507530136719221
Episod 200, pre-trainin

In [4]:
agent.eval(pretrain=True)

Accuracy:  0.9879792355431819


In [None]:
agent.train(total_episode=100, c=0.02, expert_weight=0.2, margin=0.8)

Episode 10: Total Reward: -208, Loss: 0.12204598023090511
Episode 20: Total Reward: -151, Loss: 0.16716272179037334
Episode 30: Total Reward: -129, Loss: 0.18378035463392733


In [None]:
%matplotlib inline
agent.plot()

In [None]:
agent.eval(total_episode=5)