In [None]:
import os

import gym
import gym_electricitymarket
import gym_gridworld
import numpy as np
import matplotlib.pyplot as plt

from stable_baselines import *
from stable_baselines.bench import Monitor
from stable_baselines.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines import results_plotter

from stable_baselines.ddpg.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

# TRPO

In [None]:
from stable_baselines.common.policies import MlpPolicy, FeedForwardPolicy

# Create log dir
log_dir = "tmp/"
os.makedirs(log_dir, exist_ok=True)

# Customize value network 
class CustomPolicy(FeedForwardPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomPolicy, self).__init__(*args, **kwargs,
                                           net_arch=[dict(vf=[10, 7, 5])],
                                           feature_extraction="mlp")
        
# Create and wrap the environmentc
env = gym.make('ElectricityMarket-v0')
env = Monitor(env, log_dir, allow_early_resets=True)
env = DummyVecEnv(([lambda: env]))

model = TRPO(MlpPolicy, env, verbose=1)

# Train the agent
time_steps = 20000
model.learn(total_timesteps=time_steps)

# PPO

In [None]:
from stable_baselines.common.policies import MlpPolicy, FeedForwardPolicy

# Create log dir
log_dir = "tmp/"
os.makedirs(log_dir, exist_ok=True)

# Customize value network 
class CustomPolicy(FeedForwardPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomPolicy, self).__init__(*args, **kwargs,
                                           net_arch=[dict(vf=[10, 7, 5])],
                                           feature_extraction="mlp")
        
# Create and wrap the environmentc
env = gym.make('ElectricityMarket-v0')
env = Monitor(env, log_dir, allow_early_resets=True)
env = DummyVecEnv(([lambda: env]))

model = PPO2(MlpPolicy, env, verbose=1)

# Train the agent
time_steps = 20000
model.learn(total_timesteps=time_steps)

# DDPG

In [None]:
from stable_baselines.ddpg.policies import MlpPolicy

# Create log dir
log_dir = "tmp/"
os.makedirs(log_dir, exist_ok=True)

env = gym.make('ElectricityMarket-v0')
env = Monitor(env, log_dir, allow_early_resets=True)
env = DummyVecEnv(([lambda: env]))

# The noise objects for TD3
n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma= 1 * np.ones(n_actions))

model = DDPG(MlpPolicy, env, action_noise=action_noise, verbose=1)

# Train the agent
time_steps = 30000
model.learn(total_timesteps=time_steps)

In [None]:
results_plotter.plot_results([log_dir], time_steps, results_plotter.X_TIMESTEPS, "DDPG")
plt.show()

# Q-learning

In [None]:
from stable_baselines.deepq.policies import MlpPolicy

# Create log dir
log_dir = "tmp/"
os.makedirs(log_dir, exist_ok=True)

env = gym.make('ElectricityMarketDiscreteDQN-v0')
env = Monitor(env, log_dir, allow_early_resets=True)
env = DummyVecEnv(([lambda: env]))

model = DQN(MlpPolicy, env, verbose=1)

# Train the agent
time_steps = 300000
model.learn(total_timesteps=time_steps)

In [None]:
results_plotter.plot_results([log_dir], time_steps, results_plotter.X_TIMESTEPS, "Q-learning")
plt.show()

# A2C

In [None]:
from stable_baselines.common.policies import MlpPolicy

# Create log dir
log_dir = "tmp/"
os.makedirs(log_dir, exist_ok=True)

env = gym.make('ElectricityMarket-v0')
env = Monitor(env, log_dir, allow_early_resets=True)
env = DummyVecEnv(([lambda: env]))

model = A2C(MlpPolicy, env, verbose=1)

# Train the agent
time_steps = 20000
model.learn(total_timesteps=time_steps)

In [None]:
results_plotter.plot_results([log_dir], time_steps, results_plotter.X_TIMESTEPS, "A2C")
plt.show()