In [1]:
from stable_baselines3 import PPO
from stable_baselines3.ppo.policies import MlpPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_checker import check_env
import gym
import numpy as np
from RL import helpers
from Hack import load

In [2]:
epex = load.epex().load()
price_array = epex['apx_da_hourly'].values

In [3]:
import matplotlib.pyplot as plt

def get_mode(arr, bin_number = 10):
    arr = arr[~np.isnan(arr)] #~ means not

    if len(arr) > 0:
        hist, bin_edges = np.histogram(arr, bins=bin_number)
        centers = 0.5*(bin_edges[1:]+ bin_edges[:-1])
        max_idx = np.argmax(hist)
        mode = centers[max_idx]
        return mode
    else:
        #print('Just nans')
        return np.nan

def get_expected_price(price_array, idx, window_size = 2*24, mode='mode'):
    idx = int(idx)
    
    if idx == 0:
        arr = price_array[idx]
    elif idx < window_size:
        arr = price_array[:idx]
    else:
        arr = price_array[idx - window_size : idx]
    
    if mode == 'mean':
        return np.mean(arr)
    if mode == 'mode':
        return get_mode(arr, 5)
    if mode == 'median':
        return np.median(arr)


fig, axs = plt.subplots(1,1)
axs.plot(epex.index, price_array, color = 'black')
mean_prices = []
mode_prices = []
median_prices = []
for i in np.arange(price_array.shape[0]):
    mean_prices.append(get_expected_price(price_array, i, mode = 'mean'))
    mode_prices.append(get_expected_price(price_array, i, mode = 'mode', window_size=2*24))
    median_prices.append(get_expected_price(price_array, i, mode = 'median', window_size=2*24))
    
axs.plot(epex.index, mean_prices, color = 'red')
axs.plot(epex.index, mode_prices, color = 'blue')
axs.plot(epex.index, median_prices, color = 'green')

[<matplotlib.lines.Line2D at 0x1a29aadca90>]

In [12]:
start_idx = 0
end_idx = 30769 # 2019->2020 # 2*24*7
obs_price_array = price_array[start_idx:end_idx]

env = helpers.energy_price_env(obs_price_array, window_size=24*2)
model = PPO(MlpPolicy, env, verbose=0)
check_env(env, warn=True)



In [13]:
mean_reward_before_train = helpers.evaluate(model, num_episodes=1, index = epex.index[start_idx:end_idx])

Mean reward: -103.983986 Num episodes: 3


In [20]:
# Train the agent for 10000 steps
model.learn(total_timesteps=10000)

<stable_baselines3.ppo.ppo.PPO at 0x1a29c9e8d90>

In [21]:
# Trained Agent, after training
start_idx = 30770 # 2*24*7
end_idx = -1 # 30770 + 2*24*7
test_price_array = price_array[start_idx:end_idx]


new_env =  DummyVecEnv([lambda: helpers.energy_price_env(test_price_array)])
mean_reward_after_train = helpers.evaluate(model, new_env=new_env, num_episodes=100, index=epex.index)

