In [1]:
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines3.common.utils import set_random_seed
import globals as gl
import classes as cl
import time
import os
from environment_continous import ContinousPricingGame

In [2]:
gl.initialize()

num_procs=8
model = A2C
model_name="CoA2C"
timesteps= 1_000_000
num_timesteps=10
state_onehot=False
costs=[gl.LOW_COST, gl.HIGH_COST]
adv_mixed_strategy = cl.MixedStrategy(strategiesList=[cl.Strategy(
        cl.StrategyType.static, NNorFunc=cl.myopic, name="myopic")], probablitiesArray=[1])

In [3]:
lrs=[0.00008, 0.0003, 0.0009]

In [4]:
# def make_env(rank, seed=0):
#     """
#     Utility function for multiprocessed env.
#     :param seed: (int) the inital seed for RNG
#     :param rank: (int) index of the subprocess
#     """

#     def _init():
#         env = ContinousPricingGame(tuple_costs=costs, adversary_mixed_strategy=adv_mixed_strategy, state_onehot=state_onehot)
#         env.reset(seed=seed + rank)
#         # use a seed for reproducibility
#         # Important: use a different seed for each environment
#         # otherwise they would generate the same experiences
#         return env

#     set_random_seed(seed)
#     return _init

In [5]:
def run(lr=None):    
    seed=int(time.time())

    iter_name = f"{model_name}-{str(seed)}"
    models_dir = os.path.join("models", iter_name)
    log_dir = os.path.join("logs", iter_name)
             
    if not os.path.exists(models_dir):
        os.makedirs(models_dir)

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
             
#     if num_procs == 1:
#         # if there is only one process, there is no need to use multiprocessing
#         train_env = DummyVecEnv([make_env(0,seed=seed)])
#     else:
#         # Here we use the "fork" method for launching the processes, more information is available in the doc
#         # This is equivalent to make_vec_env(env_id, n_envs=n_procs, vec_env_cls=SubprocVecEnv, vec_env_kwargs=dict(start_method='fork'))
#         train_env = SubprocVecEnv(
#             [make_env( i,seed=seed) for i in range(num_procs)],
#             start_method="fork"
#         )
#         # it is quicker if start_method="fork" but my python version gives me an error, 

    lr_=(gl.LR if (lr is None) else lr)
    train_env = ContinousPricingGame(tuple_costs=costs, adversary_mixed_strategy=adv_mixed_strategy, state_onehot=state_onehot)
    train_env.reset()
    
#     train_env.reset()
    model_ = model('MlpPolicy', train_env,learning_rate=lr_,verbose=0, tensorboard_log=log_dir, gamma=gl.GAMMA)
    
    start=time.time()
    for i in range(num_timesteps):
        model_.learn(total_timesteps=timesteps,
                     reset_num_timesteps=False, tb_log_name=iter_name)
        model_.save(os.path.join(models_dir, str(timesteps*i)))
    running_time=time.time()- start

    # test and write results
    env = ContinousPricingGame(tuple_costs=costs, adversary_mixed_strategy=adv_mixed_strategy, state_onehot=state_onehot)
    for iter in range(gl.NUM_STOCHASTIC_ITER):
             
        obs = env.reset()
        done = False

        actions = []
        while not done:
            action, _states = model_.predict(obs)
            obs, reward, done, info = env.step(action)

            actions.append(int(action))
        #   name	ep	costs	adversary	agent_return	adv_return	agent_rewards	actions	agent_prices	adv_prices	agent_demands	adv_demands	lr	hist	total_stages	action_step	num_actions	gamma	stae_onehot	seed	num_procs	running_time
        data=[iter_name, timesteps*num_timesteps,("L" if (costs[0]<costs[1]) else "H"), env.adversary_strategy.name, sum(env.profit[0]), sum(env.profit[1]),  str(env.profit[0]), str(actions), str(env.prices[0]), str(env.prices[1]), str(env.demand_potential[0]),str(env.demand_potential[1]), lr_, gl.NUM_ADV_HISTORY, gl.TOTAL_STAGES, gl.ACTION_STEP, gl.NUM_ACTIONS, gl.GAMMA, env.state_onehot, " ", num_procs, running_time]
        cl.write_to_excel(data)


In [None]:
for lr in lrs:
    #######seed should be set in multiprocessing
    for _ in range(2):
        run(lr=lr)
        

