# Imports

In [None]:
import numpy as np

from matplotlib import pyplot as plt
from src.environments.simple_microgrid import SimpleMicrogrid
from src.utils.tools import set_all_seeds, load_config, plot_metrics

# Zero Agent

## Train

In [None]:
set_all_seeds(0)
config = load_config("zero_mg")

env = SimpleMicrogrid(config=config['env'])

all_states_za_train, all_rewards_za_train, all_actions_za_train, all_net_energy_za_train = [], [], [], []

num_houses = len(env.mg.houses)
num_batches = config['env']['batch_size']
for _ in range(2):

    states_za_train, rewards_za_train, actions_za_train= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_za_train.append(state_0)
    # rewards_za.append(r_0)

    while not done:

        action = np.zeros((num_houses, num_batches, 1))

        s_t, r_t, done, _ = env.step(action)

        states_za_train.append(s_t)
        rewards_za_train.append(r_t)
        actions_za_train.append(action)

    all_states_za_train.append(np.array(states_za_train))
    all_rewards_za_train.append(np.array(rewards_za_train))
    all_actions_za_train.append(np.array(actions_za_train))
    all_net_energy_za_train.append(env.mg.net_energy)

# plot_results(env, all_states_za, all_rewards_za, all_actions_za, all_net_energy_za, 'Zero Agent (Family)', save=True, filename='imgs/za_family.png')

print("Total average",np.array(rewards_za_train, dtype=float).sum(axis=0).mean(axis=1).mean())
print(np.array(rewards_za_train, dtype=float).sum(axis=0).mean(axis=1))

## Eval

In [None]:
set_all_seeds(0)
env.change_mode('eval')

all_states_za_eval, all_rewards_za_eval, all_actions_za_eval, all_net_energy_za_eval = [], [], [], []

num_houses = len(env.mg.houses)
num_batches = config['env']['batch_size']
for _ in range(2):

    states_za_eval, rewards_za_eval, actions_za_eval= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_za_eval.append(state_0)
    # rewards_za.append(r_0)

    while not done:

        action = np.zeros((num_houses, num_batches, 1))

        s_t, r_t, done, _ = env.step(action)

        states_za_eval.append(s_t)
        rewards_za_eval.append(r_t)
        actions_za_eval.append(action)

    all_states_za_eval.append(np.array(states_za_eval))
    all_rewards_za_eval.append(np.array(rewards_za_eval))
    all_actions_za_eval.append(np.array(actions_za_eval))
    all_net_energy_za_eval.append(env.mg.net_energy)

# plot_results(env, all_states_za, all_rewards_za, all_actions_za, all_net_energy_za, 'Zero Agent (Family)', save=True, filename='imgs/za_family.png')

print("Total average",np.array(rewards_za_eval, dtype=float).sum(axis=0).mean(axis=1).mean())
print(np.array(rewards_za_eval, dtype=float).sum(axis=0).mean(axis=1))

## Test

In [None]:
set_all_seeds(0)
env.change_mode('test')

all_states_za_test, all_rewards_za_test, all_actions_za_test, all_net_energy_za_test = [], [], [], []

num_houses = len(env.mg.houses)
num_batches = config['env']['batch_size']
for _ in range(2):

    states_za_test, rewards_za_test, actions_za_test= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_za_test.append(state_0)
    # rewards_za.append(r_0)

    while not done:

        action = np.zeros((num_houses, num_batches, 1))

        s_t, r_t, done, _ = env.step(action)

        states_za_test.append(s_t)
        rewards_za_test.append(r_t)
        actions_za_test.append(action)

    all_states_za_test.append(np.array(states_za_test))
    all_rewards_za_test.append(np.array(rewards_za_test))
    all_actions_za_test.append(np.array(actions_za_test))
    all_net_energy_za_test.append(env.mg.net_energy)

# plot_results(env, all_states_za, all_rewards_za, all_actions_za, all_net_energy_za, 'Zero Agent (Family)', save=True, filename='imgs/za_family.png')
print("Total average",np.array(rewards_za_test, dtype=float).sum(axis=0).mean(axis=1).mean())
print(np.array(rewards_za_test, dtype=float).sum(axis=0).mean(axis=1))

# Random Agent

## Train

In [None]:
set_all_seeds(0)
config = load_config("zero_mg")

env = SimpleMicrogrid(config=config['env'])

all_states_ra_train, all_rewards_ra_train, all_actions_ra_train, all_net_energy_ra_train = [], [], [], []

num_houses = len(env.mg.houses)
num_batches = config['env']['batch_size']
for _ in range(2):

    states_ra_train, rewards_ra_train, actions_ra_train= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_ra_train.append(state_0)

    while not done:

        action = np.random.uniform(low=-1, high=1, size=(num_houses, num_batches, 1))


        s_t, r_t, done, _ = env.step(action)

        states_ra_train.append(s_t)
        rewards_ra_train.append(r_t)
        actions_ra_train.append(action)

    all_states_ra_train.append(np.array(states_ra_train))
    all_rewards_ra_train.append(np.array(rewards_ra_train))
    all_actions_ra_train.append(np.array(actions_ra_train))
    all_net_energy_ra_train.append(env.mg.net_energy)

print("Total average",np.array(rewards_ra_train, dtype=float).sum(axis=0).mean(axis=1).mean())
print(np.array(rewards_ra_train, dtype=float).sum(axis=0).mean(axis=1))

## Eval

In [None]:
set_all_seeds(0)
env.change_mode('eval')

all_states_ra_eval, all_rewards_ra_eval, all_actions_ra_eval, all_net_energy_ra_eval = [], [], [], []

num_houses = len(env.mg.houses)
num_batches = config['env']['batch_size']
for _ in range(2):

    states_ra_eval, rewards_ra_eval, actions_ra_eval= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_ra_eval.append(state_0)

    while not done:

        action = np.random.uniform(low=-1, high=1, size=(num_houses, num_batches, 1))

        s_t, r_t, done, _ = env.step(action)

        states_ra_eval.append(s_t)
        rewards_ra_eval.append(r_t)
        actions_ra_eval.append(action)

    all_states_ra_eval.append(np.array(states_ra_eval))
    all_rewards_ra_eval.append(np.array(rewards_ra_eval))
    all_actions_ra_eval.append(np.array(actions_ra_eval))
    all_net_energy_ra_eval.append(env.mg.net_energy)


print("Total average",np.array(rewards_ra_eval, dtype=float).sum(axis=0).mean(axis=1).mean())
print(np.array(rewards_ra_eval, dtype=float).sum(axis=0).mean(axis=1))

## Test

In [None]:
set_all_seeds(0)
env.change_mode('test')

all_states_ra_test, all_rewards_ra_test, all_actions_ra_test, all_net_energy_ra_test = [], [], [], []

num_houses = len(env.mg.houses)
num_batches = config['env']['batch_size']
for _ in range(2):

    states_ra_test, rewards_ra_test, actions_ra_test= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_ra_test.append(state_0)

    while not done:

        action = np.random.uniform(low=-1, high=1, size=(num_houses, num_batches, 1))

        s_t, r_t, done, _ = env.step(action)

        states_ra_test.append(s_t)
        rewards_ra_test.append(r_t)
        actions_ra_test.append(action)

    all_states_ra_test.append(np.array(states_ra_test))
    all_rewards_ra_test.append(np.array(rewards_ra_test))
    all_actions_ra_test.append(np.array(actions_ra_test))
    all_net_energy_ra_test.append(env.mg.net_energy)

print("Total average",np.array(rewards_ra_test, dtype=float).sum(axis=0).mean(axis=1).mean())
print(np.array(rewards_ra_test, dtype=float).sum(axis=0).mean(axis=1))

# A2C

In [None]:
import traceback

from src.rl.a2c.d_simple_microgrid import Agent

try:
    config_eval = load_config("c_a2c")
    config_eval = config_eval['train']
    
    '''
        Run the simulator
    '''
    set_all_seeds(0)

    # Instantiate the environment

    my_env = SimpleMicrogrid(config=config_eval['env'])

    # Instantiate the agent

    agent = Agent(
        env=my_env, config = config_eval
    )

    # Launch the training

    all_states, all_rewards, all_actions, all_net_energy = agent.train()

    # Finish Wandb execution

    agent.wdb_logger.finish()

except (RuntimeError, KeyboardInterrupt):

    traceback.print_exc()


In [None]:
all_rewards = np.array(all_rewards)
all_rewards.shape

In [None]:
rew = all_rewards[-1,:,:,:].mean(axis=3).mean(axis=2).mean(axis=1)
plt.plot(rew)

In [None]:
import urllib
url = 'http://data.bayanat.ae/api/action/datastore_search?resource_id=5a8ee0ef-dfc9-4ffd-9fd8-338588137313&limit=5&q=title:jones'
fileobj = urllib.urlopen(url)
print (fileobj.read())