# Train Model

In [1]:
import gymnasium as gym
from stable_baselines3 import A2C #or PPO
import os

In [2]:
models_dir = "models/A2C" # or PPO
logdir = "logs"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)

env = gym.make("LunarLander-v3", render_mode='rgb_array')
env.reset()

model = A2C('MlpPolicy', env, device="cpu", verbose=1, tensorboard_log=logdir) # or PPO

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [3]:
TIMESTEPS = 10000
iters = 0
for i in range(30):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="A2C") #or PPO
    model.save(f"{models_dir}/{TIMESTEPS*i}")

Logging to logs/A2C_0
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 303      |
|    ep_rew_mean        | -342     |
| time/                 |          |
|    fps                | 414      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | -0.215   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 2.25     |
|    value_loss         | 3.87     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 164      |
|    ep_rew_mean        | -365     |
| time/                 |          |
|    fps                | 598      |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1000     |
| train/        

KeyboardInterrupt: 

# Load Model

In [3]:
import gymnasium as gym
from stable_baselines3 import PPO

models_dir = "/home/prh/Desktop/Local_Prgm_Projects/TetoML/models/PPO"

env = gym.make('LunarLander-v3', render_mode='human')
env.reset()

model_path = f"{models_dir}/230000.zip"
model = PPO.load(model_path, env=env)

episodes = 5

for ep in range(episodes):
    obs, info = env.reset()
    done = False
    while not done:
        action, _states = model.predict(obs)
        obs, rewards, terminated, truncated, info = env.step(action)
        env.render()
        done = truncated or terminated
        print(rewards)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
1.0263583767774094
1.115293096675515
1.139814170195507
1.1531597055966927
1.1590351428073973
1.155881252337764
1.1418831627942723
1.1149160315662527
1.072532626004687
1.0119085309334253
0.9302314178996767
0.8246269280489855
0.692800834267473
0.5337359424891019
0.34830736002882645
0.14015161256753572
-0.08425595740689573
0.5722053443414279
0.6686269940404099
-0.45802571602513353
0.33571592418616203
-0.7017058679174966
-0.10676553641528017
0.9339245901806634
0.6344064808581333
0.28277767032807444
-1.5643368425535573
2.543254623923315
0.8782465581222538
-2.5224859462492177
-0.35086069175746387
-1.737692518661845
-2.426232774239422
3.5083090605213103
1.1036218560769668
-2.0432364311953095
0.7755574473599267
-1.8859445281279068
2.788570992037461
2.2385390739399442
-0.23971923490353106
0.6852834044777409
-1.6579660287525815
-0.49944444773336616
-0.567751161060744
0.7966410716991106
-0.7760722812244569
3.811563474942