In [None]:
def objective(trial):
    # Define hyperparameters using Optuna's trial object
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    n_steps = trial.suggest_int('n_steps', 1024, 8192, step=1024)
    
    n_envs = 5  # This should be consistent with your environment setup
    buffer_size = n_steps * n_envs

    # Predefined potential batch sizes
    potential_batch_sizes = [64, 128, 256]
    # Select the largest possible batch size that is a divisor of buffer_size
    batch_size = max([size for size in potential_batch_sizes if buffer_size % size == 0])

    n_epochs = trial.suggest_int('n_epochs', 5, 32)
    n_layers = trial.suggest_int('n_layers', 2, 3)
    layer_size = trial.suggest_categorical('layer_size', [64, 128, 256])
    
    clip_range = trial.suggest_float('clip_range', 0.15, 0.25)
    
    ent_coef  = trial.suggest_float('ent_coef', 0.0, 0.02)
    
    gamma =  trial.suggest_float('gamma', 0.95, 0.99)
    gae_lambda =  trial.suggest_float('gae_lambda', 0.90, 0.95)

    # Network architecture setup
    net_arch = [layer_size] * n_layers

    # Activation function handling
    activation_functions = {
        'ReLU': torch.nn.ReLU,
        'Tanh': torch.nn.Tanh
    }
    activation_class = activation_functions[trial.suggest_categorical('activation_fn', ['ReLU', 'Tanh'])]

    policy_kwargs = dict(
        net_arch=net_arch,
        activation_fn=activation_class
    )
    

    def make_env(rank: int, seed: int = 0) -> Callable:
        def _init() -> gym.Env:
            random.seed(seed + rank)
            np.random.seed(seed + rank) 
            env = TrainEnvironment(AC_OUTPUT_arr, elec_consum_arr, import_price_rate, import_price_arr, Eff_arr, CAPEX_JA_arr)
            env.reset(seed=seed + rank)
            return env

        return _init
    # Number of environments to run in parallel
    num_cpu = 16
    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])   

    # Model initialization
    model = PPO('MlpPolicy', env, verbose=0, learning_rate=learning_rate,
                n_steps=n_steps, batch_size=batch_size, n_epochs=n_epochs,
                policy_kwargs=policy_kwargs, clip_range=clip_range, gamma = gamma, gae_lambda = gae_lambda, ent_coef=ent_coef)

    # Training the model
    model.learn(total_timesteps=1000000)

    # Model evaluation
    obs = eval_env.reset()

    total_rewards = 0
    for _ in range(1000):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = eval_env.step(action)
        total_rewards += reward
        if done:
            obs = eval_env.reset()

    eval_env.close()

    return total_rewards

In [None]:
pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=350000, interval_steps=50000)
study = optuna.create_study(direction='maximize', pruner=pruner)
study.optimize(objective, n_trials=200)

print("Best trial:")
trial = study.best_trial

print(f" Value: {trial.value}")
print(" Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")