In [1]:
import sys
import os
sys.path.append(os.getcwd())
from gym_env_wrapper import create_env
#import gym
import gymnasium as gym
from stable_baselines3.common.vec_env import SubprocVecEnv
import stable_baselines3 as sb3
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3.common.callbacks import BaseCallback
import time


save_path = "/home/chaofan/Documents/pyhyflex/hhrl/results/"
config_path = '/home/chaofan/Documents/pyhyflex/hhrl/configs/fir_discrete.ini'


class ProgressCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(ProgressCallback, self).__init__(verbose)
        self.start_time = None
        self.episode_rewards = []

    def _on_training_start(self):
        self.start_time = time.time()
        print("Training started!")

    def _on_step(self):
        if len(self.locals['infos']) > 0:
            for info in self.locals['infos']:
                if 'episode' in info.keys():
                    self.episode_rewards.append(info['episode']['r'])
                    
        if self.num_timesteps % 1000 == 0:
            elapsed_time = time.time() - self.start_time
            print(f"Step: {self.num_timesteps}, Elapsed Time: {elapsed_time:.2f}s")
        return True

    def _on_training_end(self):
        elapsed_time = time.time() - self.start_time
        print(f"Training ended! Total time: {elapsed_time:.2f}s")
        
        # Plot rewards only if there are any
        if self.episode_rewards:
            plt.plot(np.arange(len(self.episode_rewards)), self.episode_rewards)
            plt.xlabel('Episode')
            plt.ylabel('Reward')
            plt.title('Training Rewards')
            plt.show()
        else:
            print("No episode rewards recorded.")


###make_env###

def make_env(problem, instance_id, run_id, iteration_limit, overwrite):
    seed = 7  # 固定种子为 7
    def _init():
        env = create_env(problem, instance_id, seed, run_id, iteration_limit, config_path, save_path, overwrite)
        return env

    return _init


if __name__ == "__main__":
    num_cpu = 10  # 使用的CPU数量

    # 创建并行环境
    env = SubprocVecEnv([make_env('BP', 0, i, 10000, True) for i in range(num_cpu)])
    
    # 创建并训练模型
    model = sb3.DQN('MlpPolicy', env, verbose=1)
    
    # 创建进度回调
    progress_callback = ProgressCallback(verbose=1)
    
    # 传递回调函数给learn方法
    model.learn(total_timesteps=1000000, callback=progress_callback)
    
    # 保存模型
    model.save("dqn_hyflex")
    
    # 关闭环境
    env.close()


Using cpu device
Training started!
Step: 1000, Elapsed Time: 3.80s
Step: 3000, Elapsed Time: 132.10s
Step: 4000, Elapsed Time: 135.18s
Step: 5000, Elapsed Time: 138.25s
Step: 6000, Elapsed Time: 141.36s
Step: 7000, Elapsed Time: 144.45s
Step: 8000, Elapsed Time: 147.46s
Step: 9000, Elapsed Time: 150.48s
Step: 10000, Elapsed Time: 153.52s
Step: 11000, Elapsed Time: 156.60s
Step: 12000, Elapsed Time: 159.62s
Step: 13000, Elapsed Time: 162.65s
Step: 14000, Elapsed Time: 165.69s
Step: 15000, Elapsed Time: 168.72s
Step: 16000, Elapsed Time: 171.76s
Step: 17000, Elapsed Time: 174.83s
Step: 18000, Elapsed Time: 177.88s
Step: 19000, Elapsed Time: 180.89s
Step: 20000, Elapsed Time: 183.97s
Step: 21000, Elapsed Time: 187.00s
Step: 22000, Elapsed Time: 190.04s
Step: 23000, Elapsed Time: 193.12s
Step: 24000, Elapsed Time: 196.19s
Step: 25000, Elapsed Time: 199.19s
Step: 26000, Elapsed Time: 202.23s
Step: 27000, Elapsed Time: 205.28s
Step: 28000, Elapsed Time: 208.32s
Step: 29000, Elapsed Time: 211

In [1]:
import os

cpu_count = os.cpu_count()
print(f"Number of CPUs: {cpu_count}")


Number of CPUs: 14
