In [6]:
from stable_baselines3 import PPO
from stable_baselines3.ppo.policies import MlpPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_checker import check_env
import gym
import numpy as np
from RL.helpers import energy_price_env
from Hack import load
from matplotlib import pyplot as plt
%matplotlib qt5

In [7]:
def evaluate(model, new_env=None, num_episodes=100, index=None):
    """
    Evaluate a RL agent
    :param model: (BaseRLModel object) the RL Agent
    :param num_episodes: (int) number of episodes to evaluate it
    :return: (float) Mean reward for the last num_episodes
    """
    # This function will only work for a single Environment
    if new_env is None:
        env = model.get_env()
    else:
        env = new_env
    env.reset()
    all_episode_rewards = []

    for i in range(num_episodes):

        episode_rewards = []

        if i == 0:
            current_prices = []
            mean_prices = []
            current_energies = []
            all_earnings = [0]
            current_times = []

        done = False
        obs = env.reset()
        while not done:
            # _states are only useful when using LSTM policies
            action, _states = model.predict(obs)
            # here, action, rewards and dones are arrays
            # because we are using vectorized env
            obs, reward, done, info = env.step(action)
            current_price, mean_price, current_energy, current_time = (
                obs[0, 0],
                obs[0, 1],
                obs[0, 2],
                obs[0, 3],
            )
            episode_rewards.append(reward)

            if i == 0:
                if len(current_energies) > 0:
                    all_earnings.append(
                        -current_price * (current_energy - current_energies[-1])
                    )

                current_prices.append(current_price)
                mean_prices.append(mean_price)
                current_energies.append(current_energy)
                current_times.append(current_time)

        all_episode_rewards.append(sum(episode_rewards))

    fig, axs = plt.subplots(4, 1, sharex=True)
    if index is None:
        index = np.arange(0, len(current_times))[:-1]
    else:
        index = index[np.asarray(current_times, dtype=int)][:-1]
    cum_rewards = np.cumsum(episode_rewards)
    bank_total = np.cumsum(all_earnings)
    axs[0].plot(index, cum_rewards[:-1], color="red", label="Cumalative rewards")
    axs[0].plot(index, bank_total[:-1], color="blue", label="Bank total")
    axs[0].legend()
    axs[1].plot(index, current_prices[:-1], color="blue", label="Current prices")
    axs[1].plot(index, mean_prices[:-1], color="red", label="Mean prices")
    axs[1].legend()

    axs[2].plot(index, episode_rewards[:-1], color="black", label="Reward")
    axs[2].legend()

    axs[3].plot(index, current_energies[:-1], color="blue", label="Current energies")

    mean_episode_reward = np.mean(all_episode_rewards)
    std_episode_reward = np.std(all_episode_rewards)
    print(
        "Mean reward:",
        mean_episode_reward,
        "+/-",
        std_episode_reward,
        "\t Num episodes:",
        num_episodes,
    )

    return mean_episode_reward

In [8]:
# define the data
epex = load.epex().load()
price_array = epex['apx_da_hourly'].values

# define environment
#! window_size is sort of a free parameter
max_time = 30769 
env = energy_price_env(price_array, max_time=max_time, window_size=24*2)

check_env(env, warn=True)

# define the model 
model1= PPO(MlpPolicy, env, verbose=1) # default

print("BEFORE")
mean_reward_before_train = evaluate(model1, num_episodes=1, index = epex.index)



Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
BEFORE


NameError: name 'plt' is not defined

In [None]:
model1.learn(total_timesteps=10000)

In [None]:
periods = 48*7
new_env =  DummyVecEnv([lambda: energy_price_env(price_array, start_time=max_time, max_time = periods)])
print("AFTER")
mean_reward_after_train = evaluate(model1, new_env=new_env, num_episodes=1, index=epex.index)
# load model using loaded_model = PPO.load("path_to_model")