# Imports

In [None]:
import numpy as np

from matplotlib import pyplot as plt
from src.environments.mg_simple import MGSimple
from src.rl.a2c.c_mg_simple import set_all_seeds

# Utils

In [None]:
def plot_results(env, states, rewards, actions, net_energy, title, save=False, filename='results.png'):

    # Get list indexes

    quart_index = int(len(states) / 4)
    mid_index = int(len(states)/2)
    three_quart_index = int(len(states) * 3/4)
    last_index = len(states) - 1

    # Parse list of arrays

    rewards = np.stack(rewards, axis=0)
    actions = np.stack(actions, axis=0)
    states = np.stack(states, axis=0)
    net_energy = np.stack(net_energy, axis=0)

    all_socs = states[:,:,:,19]

    # Plot results of interest

    fig = plt.figure(figsize=(10, 15))
    axs = fig.subplots(5, 2)

    for ax_x in axs:
        for ax_y in ax_x:
            ax_y.minorticks_on()
            ax_y.grid(True, which='both', axis='both', alpha=0.5)

    axs[0][0].plot(rewards[0,:,:].mean(axis=1), label='0%')
    axs[0][0].plot(rewards[quart_index,:,:].mean(axis=1), label='25%')
    axs[0][0].plot(rewards[mid_index,:,:].mean(axis=1), label='50%')
    axs[0][0].plot(rewards[three_quart_index,:,:].mean(axis=1), label='75%')
    axs[0][0].plot(rewards[last_index,:,:].mean(axis=1), label='100%')
    axs[0][0].set_title('Mean reward through time')
    axs[0][0].legend()

    axs[0][1].plot(rewards.sum(axis=1).mean(axis=1), label='Reward')
    axs[0][1].set_title('Mean reward through epochs')
    axs[0][1].legend()

    axs[1][0].plot(actions[0, :, :].mean(axis=1), label='0%')
    axs[1][0].plot(actions[quart_index, :, :].mean(axis=1), label='25%')
    axs[1][0].plot(actions[mid_index, :, :].mean(axis=1), label='50%')
    axs[1][0].plot(actions[three_quart_index, :, :].mean(axis=1), label='75%')
    axs[1][0].plot(actions[last_index, :, :].mean(axis=1), label='100%')
    axs[1][0].set_title('Mean action through time')
    axs[1][0].legend()

    axs[1][1].plot(actions.mean(axis=1).mean(axis=-1), label='Action')
    axs[1][1].legend()
    axs[1][1].set_title('Mean action through epochs')

    axs[2][0].plot(all_socs[0, :, :].mean(axis=1), label='0%')
    axs[2][0].plot(all_socs[quart_index, :, :].mean(axis=1), label='25%')
    axs[2][0].plot(all_socs[mid_index, :, :].mean(axis=1), label='50%')
    axs[2][0].plot(all_socs[three_quart_index, :, :].mean(axis=1), label='75%')
    axs[2][0].plot(all_socs[last_index, :, :].mean(axis=1), label='100%')
    axs[2][0].legend()
    axs[2][0].set_title('Mean SOC through time')

    axs[2][1].plot(all_socs.mean(axis=1).mean(axis=-1), label='SOC')
    axs[2][1].set_title('Mean SOC through epochs')
    axs[2][1].legend()

    axs[3][0].plot(env.mg.pv_gen, label='PV')
    axs[3][0].plot(env.mg.demand, label='Demand')
    axs[3][0].set_title('PV and Demand')
    axs[3][0].legend()

    axs[3][1].plot(env.mg.price, label='Price')
    axs[3][1].plot(env.mg.emission, label='Emission factor')
    axs[3][1].set_title('Price and Emission factor')
    axs[3][1].legend()

    axs[4][0].plot(net_energy[0, :, :].mean(axis=0), label='0%')
    axs[4][0].plot(net_energy[quart_index, :, :].mean(axis=0), label='25%')
    axs[4][0].plot(net_energy[mid_index, :, :].mean(axis=0), label='50%')
    axs[4][0].plot(net_energy[three_quart_index, :, :].mean(axis=0), label='75%')
    axs[4][0].plot(net_energy[last_index, :, :].mean(axis=0), label='100%')
    axs[4][0].plot((env.mg.demand - env.mg.pv_gen), label='Remaining', linestyle='--', linewidth=2)
    axs[4][0].legend()
    axs[4][0].set_title('Mean net energy through time')

    axs[4][1].plot(net_energy.mean(axis=1).sum(axis=-1), label='Net energy')
    axs[4][1].set_title('Mean net energy through epochs')
    axs[4][1].legend()

    fig.suptitle(title)
    fig.tight_layout()

    if save:
        plt.savefig(filename)

    plt.grid()
    plt.show()


# Zero agent

In [None]:
set_all_seeds(0)
batch_size = 1
env = MGSimple(batch_size=batch_size, steps=24, min_temp=29, max_temp=31, peak_pv_gen=1, peak_grid_gen=1, peak_load=1, disable_noise=True)

all_states_za, all_rewards_za, all_actions_za, all_net_energy_za = [], [], [], []

# Just a couple iterations to generate the line

for _ in range(2):

    states_za, rewards_za, actions_za= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_za.append(state_0)
    rewards_za.append(r_0)

    while not done:

        action = np.zeros((batch_size, 1))

        s_t, r_t, done, _ = env.step(action)

        states_za.append(s_t)
        rewards_za.append(r_t)
        actions_za.append(action)

    all_states_za.append(np.array(states_za))
    all_rewards_za.append(np.array(rewards_za))
    all_actions_za.append(np.array(actions_za))
    all_net_energy_za.append(env.mg.net_energy)

plot_results(env, all_states_za, all_rewards_za, all_actions_za, all_net_energy_za, 'Zero Agent', save=False, filename='Test')

# Random Agent

In [None]:
set_all_seeds(0)
batch_size = 1
env = MGSimple(batch_size=batch_size, steps=24, min_temp=29, max_temp=31, peak_pv_gen=1, peak_grid_gen=1, peak_load=1, disable_noise=True)

# Initialize states and rewards

state_0, r_0, done, _ = env.reset()

states_ra, rewards_ra, actions_ra = [], [], []

while not done:

    action = np.random.uniform(low=-1, high=1, size=(batch_size, 1))

    s_t, r_t, done, _ = env.step(action)

    states_ra.append(s_t)
    rewards_ra.append(r_t)
    actions_ra.append(action)

states_ra = np.stack(states_ra, axis=1)
rewards_ra = np.stack(rewards_ra, axis=1)
actions_ra = np.stack(actions_ra, axis=1)
rewards_ra.sum()

# CVX Agent

In [None]:
set_all_seeds(0)

batch_size = 1
env = MGSimple(batch_size=batch_size, steps=24, min_temp=29, max_temp=31, peak_pv_gen=1, peak_grid_gen=1, peak_load=1, disable_noise=True)

perf_actions = [
    0.28193477,  0.11254492,  0.0573918 ,  0.05726008,  0.11338788,
    0.26636945, -0.03817355, -0.70927286, -0.14144247,  0.14789326,
    0.14227884,  0.1623696 ,  0.17143305,  0.12490222,  0.1400119 ,
    -0.21581801, -0.48669766, -0.18637322,  0.27016387,  0.09638493,
    0.02096857, -0.00388604, -0.13798126, -0.24565006
]

all_states_opt, all_rewards_opt, all_actions_opt, all_net_energy_opt = [], [], [], []

# Just a couple iterations to generate the line

for _ in range(2):

    states_opt, rewards_opt, actions_opt= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_opt.append(state_0)
    rewards_opt.append(r_0)

    while not done:

        action = np.ones((batch_size, 1))
        action = action * perf_actions[int(s_t[0,0])]

        s_t, r_t, done, _ = env.step(action)

        states_opt.append(s_t)
        rewards_opt.append(r_t)
        actions_opt.append(action)

    all_states_za.append(states_opt)
    all_rewards_za.append(rewards_opt)
    all_actions_za.append(actions_opt)
    all_net_energy_za.append(env.mg.net_energy)

plot_results(env, all_states_za, all_rewards_za, all_actions_za, all_net_energy_za, 'CVX Agent', save=False, filename='Test')

# A2C Causality cont.

In [None]:
import traceback

from src.environments.mg_simple import MGSimple
from src.rl.a2c.c_mg_simple import Agent

# Get arguments from command line

disable_logging = False
disable_noise = True
batch_size = 8
training_steps = 1500
rollout_steps = 24
actor_lr = 0.0001
critic_lr = 0.001
actor_nn = 64
critic_nn = 64
gamma = 1
enable_gpu = True
central_agent = False # Not needed yet
random_soc_0 = False
encoding = False # Not needed yet
extended_observation = False # Not needed yet
epsilon = 0.001

# Start wandb logger

try:

    '''
        Setup all the configurations for Wandb
    '''

    wdb_config={
        "training_steps": training_steps,
        "batch_size": batch_size,
        "rollout_steps": rollout_steps,
        "agent_actor_lr": actor_lr,
        "agent_critic_lr": critic_lr,
        "agent_actor_nn": actor_nn,
        "agent_critic_nn": critic_nn,
        "gamma": gamma,
        "central_agent": central_agent,
        "random_soc_0": random_soc_0,
        "encoding": encoding,
        "extended_observation": extended_observation,
        "epsilon": epsilon,
    }

    '''
        Run the simulator
    '''

    set_all_seeds(0)

    # Instantiate the environment

    my_env = MGSimple(
        batch_size=batch_size, steps = rollout_steps, min_temp = 29, max_temp = 31, peak_pv_gen = 1, peak_grid_gen = 1, peak_load = 1,
        random_soc_0=random_soc_0, disable_noise=disable_noise
    )

    # Instantiate the agent

    agent = Agent(
        env=my_env, critic_lr=critic_lr, actor_lr=actor_lr, actor_nn=actor_nn, critic_nn=critic_nn, batch_size=batch_size, gamma=gamma,
        extended_obs=extended_observation, wandb_dict=wdb_config, enable_gpu=enable_gpu, disable_wandb=disable_logging,
    )

    # Launch the training

    all_states, all_rewards, all_actions = agent.train(training_steps=training_steps, epsilon=epsilon)

    # Finish Wandb execution

    agent.wdb_logger.finish()

except (RuntimeError, KeyboardInterrupt):

    traceback.print_exc()

# A2C Causality disc.

In [None]:
import traceback

from src.environments.mg_simple import MGSimple
from src.rl.a2c.d_mg_simple import Agent, set_all_seeds

# Get arguments from command line

disable_logging = True
disable_noise = True
batch_size = 64
training_steps = 8000
rollout_steps = 24
actor_lr = 0.000025
critic_lr = 0.0001
actor_nn = 128
critic_nn = 64
gamma = 1
enable_gpu = True
central_agent = False # Not needed yet
random_soc_0 = False
encoding = False # Not needed yet
extended_observation = False # Not needed yet
epsilon = 0.9

# Start wandb logger

try:

    '''
        Setup all the configurations for Wandb
    '''

    wdb_config={
        "training_steps": training_steps,
        "batch_size": batch_size,
        "rollout_steps": rollout_steps,
        "agent_actor_lr": actor_lr,
        "agent_critic_lr": critic_lr,
        "agent_actor_nn": actor_nn,
        "agent_critic_nn": critic_nn,
        "gamma": gamma,
        "central_agent": central_agent,
        "random_soc_0": random_soc_0,
        "encoding": encoding,
        "extended_observation": extended_observation,
        "epsilon": epsilon,
    }

    '''
        Run the simulator
    '''

    set_all_seeds(0)

    # Instantiate the environment

    my_env = MGSimple(
        batch_size=batch_size, steps = rollout_steps, min_temp = 29, max_temp = 31, peak_pv_gen = 1, peak_grid_gen = 1, peak_load = 1,
        random_soc_0=random_soc_0, disable_noise=disable_noise
    )

    # Instantiate the agent

    agent = Agent(
        env=my_env, critic_lr=critic_lr, actor_lr=actor_lr, actor_nn=actor_nn, critic_nn=critic_nn, batch_size=batch_size, gamma=gamma,
        extended_obs=extended_observation, wandb_dict=wdb_config, enable_gpu=enable_gpu, disable_wandb=disable_logging,
    )

    # Launch the training

    all_states, all_rewards, all_actions, all_net_energy = agent.train(training_steps=training_steps, epsilon=epsilon)

    # Finish Wandb execution

    agent.wdb_logger.finish()

except (RuntimeError, KeyboardInterrupt):

    traceback.print_exc()

print('A2C training completed')

In [None]:
# Plot results

print(wdb_config)

plot_results(my_env, all_states, all_rewards, all_actions, all_net_energy, 'A2C D results', save=False, filename='a2c_results.png')
