In [None]:
import numpy as np

from matplotlib import pyplot as plt
from src.environments.mg_simple import MGSimple

## Zero agent

In [None]:
batch_size = 1
env = MGSimple(batch_size=batch_size, steps=24, min_temp=29, max_temp=31, peak_pv_gen=1, peak_conv_gen=1, peak_load=1)

# Initialize states and rewards

state_0, r_0, done, _ = env.reset()

states_za, rewards_za, actions_za = [], [], []

while not done:

    action = np.zeros((batch_size, 1))

    s_t, r_t, done, _ = env.step(action)

    states_za.append(s_t)
    rewards_za.append(r_t)
    actions_za.append(action)

states_za = np.stack(states_za, axis=1)
rewards_za = np.stack(rewards_za, axis=1)
actions_za = np.stack(actions_za, axis=1)

# Random Agent

In [None]:
batch_size = 1
env = MGSimple(batch_size=batch_size, steps=24, min_temp=29, max_temp=31, peak_pv_gen=1, peak_conv_gen=1, peak_load=1)

# Initialize states and rewards

state_0, r_0, done, _ = env.reset()

states_ra, rewards_ra, actions_ra = [], [], []

while not done:

    action = np.random.uniform(low=-1, high=1, size=(batch_size, 1))

    s_t, r_t, done, _ = env.step(action)

    states_ra.append(s_t)
    rewards_ra.append(r_t)
    actions_ra.append(action)

states_ra = np.stack(states_ra, axis=1)
rewards_ra = np.stack(rewards_ra, axis=1)
actions_ra = np.stack(actions_ra, axis=1)

# A2C Causality cont.

In [None]:
import traceback

from src.environments.mg_simple import MGSimple
from src.rl.a2c.c_mg_simple import Agent, set_all_seeds

# Get arguments from command line

disable_logging = False
disable_noise = True
batch_size = 8
training_steps = 1500
rollout_steps = 24
actor_lr = 0.0001
critic_lr = 0.001
actor_nn = 64
critic_nn = 64
gamma = 1
enable_gpu = True
central_agent = False # Not needed yet
random_soc_0 = False
encoding = False # Not needed yet
extended_observation = False # Not needed yet
epsilon = 0.001

# Start wandb logger

try:

    '''
        Setup all the configurations for Wandb
    '''

    wdb_config={
        "training_steps": training_steps,
        "batch_size": batch_size,
        "rollout_steps": rollout_steps,
        "agent_actor_lr": actor_lr,
        "agent_critic_lr": critic_lr,
        "agent_actor_nn": actor_nn,
        "agent_critic_nn": critic_nn,
        "gamma": gamma,
        "central_agent": central_agent,
        "random_soc_0": random_soc_0,
        "encoding": encoding,
        "extended_observation": extended_observation,
        "epsilon": epsilon,
    }

    '''
        Run the simulator
    '''

    set_all_seeds(0)

    # Instantiate the environment

    my_env = MGSimple(
        batch_size=batch_size, steps = rollout_steps, min_temp = 29, max_temp = 31, peak_pv_gen = 1, peak_conv_gen = 1, peak_load = 1,
        random_soc_0=random_soc_0, disable_noise=disable_noise
    )

    # Instantiate the agent

    agent = Agent(
        env=my_env, critic_lr=critic_lr, actor_lr=actor_lr, actor_nn=actor_nn, critic_nn=critic_nn, batch_size=batch_size, gamma=gamma,
        extended_obs=extended_observation, wandb_dict=wdb_config, enable_gpu=enable_gpu, disable_wandb=disable_logging,
    )

    # Launch the training

    all_states, all_rewards, all_actions = agent.train(training_steps=training_steps, epsilon=epsilon)

    # Finish Wandb execution

    agent.wdb_logger.finish()

except (RuntimeError, KeyboardInterrupt):

    traceback.print_exc()

# A2C Causality disc.

In [None]:
import traceback

from src.environments.mg_simple import MGSimple
from src.rl.a2c.d_mg_simple import Agent, set_all_seeds

# Get arguments from command line

disable_logging = False
disable_noise = True
batch_size = 8
training_steps = 1500
rollout_steps = 24
actor_lr = 0.0009
critic_lr = 0.001
actor_nn = 128
critic_nn = 64
gamma = 1
enable_gpu = True
central_agent = False # Not needed yet
random_soc_0 = False
encoding = False # Not needed yet
extended_observation = False # Not needed yet
epsilon = 0.001

# Start wandb logger

try:

    '''
        Setup all the configurations for Wandb
    '''

    wdb_config={
        "training_steps": training_steps,
        "batch_size": batch_size,
        "rollout_steps": rollout_steps,
        "agent_actor_lr": actor_lr,
        "agent_critic_lr": critic_lr,
        "agent_actor_nn": actor_nn,
        "agent_critic_nn": critic_nn,
        "gamma": gamma,
        "central_agent": central_agent,
        "random_soc_0": random_soc_0,
        "encoding": encoding,
        "extended_observation": extended_observation,
        "epsilon": epsilon,
    }

    '''
        Run the simulator
    '''

    set_all_seeds(0)

    # Instantiate the environment

    my_env = MGSimple(
        batch_size=batch_size, steps = rollout_steps, min_temp = 29, max_temp = 31, peak_pv_gen = 1, peak_conv_gen = 1, peak_load = 1,
        random_soc_0=random_soc_0, disable_noise=disable_noise
    )

    # Instantiate the agent

    agent = Agent(
        env=my_env, critic_lr=critic_lr, actor_lr=actor_lr, actor_nn=actor_nn, critic_nn=critic_nn, batch_size=batch_size, gamma=gamma,
        extended_obs=extended_observation, wandb_dict=wdb_config, enable_gpu=enable_gpu, disable_wandb=disable_logging,
    )

    # Launch the training

    all_states, all_rewards, all_actions = agent.train(training_steps=training_steps, epsilon=epsilon)

    # Finish Wandb execution

    agent.wdb_logger.finish()

except (RuntimeError, KeyboardInterrupt):

    traceback.print_exc()

print('A2C training completed')

# Plots

In [None]:
all_rewards = np.stack(all_rewards, axis=0).squeeze()
all_actions = np.stack(all_actions, axis=0).squeeze()
all_states = np.stack(all_states, axis=0).squeeze()

plt.plot(all_rewards[0,:,0], label='reward 0 0')
plt.plot(all_rewards[250,:,0], label='reward 250 0')
plt.plot(all_rewards[1499,:,0], label='reward 499 0')
plt.legend()
plt.show()

In [None]:
avg_reward = all_rewards.sum(axis=1).mean(axis=1)

plt.plot(avg_reward)
plt.show()

In [None]:
plt.plot(all_actions[0,:,0], label='actions 0')
plt.plot(all_actions[250,:,0], label='actions 250')
plt.plot(all_actions[1499,:,0], label='actions 499')
plt.legend()
plt.show()

In [None]:
avg_action = all_actions.mean(axis=1).mean(axis=1)

plt.plot(avg_action)
plt.show()

In [None]:
all_socs = all_states[:,:,:,8]

plt.plot(all_socs[0,:,0], label='soc 0')
plt.plot(all_socs[250,:,0], label='soc 250')
plt.plot(all_socs[1499,:,0], label='soc 499')
plt.legend()
plt.show()

In [None]:
all_net_energy = all_states[:,:,:,4]
remaining_energy = my_env.mg.remaining_energy

plt.plot(remaining_energy, label='remaining energy')
plt.plot(all_net_energy[0,:,0], label='net energy 0')
plt.plot(all_net_energy[250,:,0], label='net energy 250')
plt.plot(all_net_energy[499,:,0], label='net energy 499')
plt.legend()
plt.show()

In [None]:
remaining_energy = my_env.mg.pv_gen

remaining_energy.shape

plt.plot(remaining_energy, label='remaining energy')
plt.legend()
plt.show()