In [1]:
import os
import gymnasium as gym
from stable_baselines3 import PPO, A2C, DQN
from tqdm import tqdm

In [None]:
epochs = 500 

# Set path
logsPath = os.path.join(os.getcwd(), 'logs')
ppoModelsPath = os.path.join(os.getcwd(), f'models_{epochs}', 'PPO')
a2cModelsPath = os.path.join(os.getcwd(), f'models_{epochs}', 'A2C')
dqnModelsPath = os.path.join(os.getcwd(), f'models_{epochs}', 'DQN')

# Make Dir
if not os.path.exists(logsPath):    
    os.makedirs(logsPath)
if not os.path.exists(ppoModelsPath):   
    os.makedirs(ppoModelsPath)
if not os.path.exists(a2cModelsPath):   
    os.makedirs(a2cModelsPath)
if not os.path.exists(dqnModelsPath):   
    os.makedirs(dqnModelsPath)

# Set Enviroment
env = gym.make('CartPole-v1')
env.reset()

# Reset Model
ppoModel = PPO('MlpPolicy', env, verbose=0, tensorboard_log=logsPath)  
a2cModel = A2C('MlpPolicy', env, verbose=0, tensorboard_log=logsPath)  
dqnModel = DQN('MlpPolicy', env, verbose=0, tensorboard_log=logsPath) 

timesteps = 10000

for i in tqdm(range(1, epochs), desc="Training Progress"):
    ppoModel.learn(total_timesteps=timesteps, reset_num_timesteps=False, tb_log_name="PPO")
    ppoModel.save("%s/%s" % (ppoModelsPath, timesteps * i))
    
    a2cModel.learn(total_timesteps=timesteps, reset_num_timesteps=False, tb_log_name="A2C")
    a2cModel.save("%s/%s" % (a2cModelsPath, timesteps * i))
    
    dqnModel.learn(total_timesteps=timesteps, reset_num_timesteps=False, tb_log_name="DQN")
    dqnModel.save("%s/%s" % (dqnModelsPath, timesteps * i))

env.close()

# Check Result
print("\nFinal results:")
print("PPO Model saved at: %s" % ppoModelsPath)
print("A2C Model saved at: %s" % a2cModelsPath)
print("DQN Model saved at: %s" % dqnModelsPath)


Training Progress: 100%|██████████| 499/499 [5:41:50<00:00, 41.10s/it]    


Final results:
PPO Model saved at: d:\Project\CartPoleProblem\models_500\PPO
A2C Model saved at: d:\Project\CartPoleProblem\models_500\A2C
DQN Model saved at: d:\Project\CartPoleProblem\models_500\DQN



