# Imports

In [None]:
import numpy as np

from matplotlib import pyplot as plt
from src.environments.mg_simple import MGSimple
from src.utils.tools import set_all_seeds, load_config,plot_results

# Zero agent

In [None]:
set_all_seeds(0)
#
config = load_config("zero")
config = config['train']
env = MGSimple(config=config['env'])

all_states_za, all_rewards_za, all_actions_za, all_net_energy_za = [], [], [], []

# Just a couple iterations to generate the line

for _ in range(2):

    states_za, rewards_za, actions_za= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_za.append(state_0)
    rewards_za.append(r_0)

    while not done:

        action = np.zeros((config['env']['batch_size'], 1))

        s_t, r_t, done, _ = env.step(action)

        states_za.append(s_t)
        rewards_za.append(r_t)
        actions_za.append(action)

    all_states_za.append(np.array(states_za))
    all_rewards_za.append(np.array(rewards_za))
    all_actions_za.append(np.array(actions_za))
    all_net_energy_za.append(env.mg.net_energy)

plot_results(env, all_states_za, all_rewards_za, all_actions_za, all_net_energy_za, 'Zero Agent (Family)', save=True, filename='imgs/za_family.png')

# Random Agent

In [None]:
set_all_seeds(0)
config = load_config("zero")
config = config['train']
env = MGSimple(config=config['env'])

all_states_ra, all_rewards_ra, all_actions_ra, all_net_energy_ra = [], [], [], []

# Just a couple iterations to generate the line

for _ in range(100):

    states_ra, rewards_ra, actions_ra= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_ra.append(state_0)
    rewards_ra.append(r_0)

    while not done:

        action = np.random.uniform(low=-1, high=1, size=(config['env']['batch_size'], 1))

        s_t, r_t, done, _ = env.step(action)

        states_ra.append(s_t)
        rewards_ra.append(r_t)
        actions_ra.append(action)

    all_states_ra.append(np.array(states_ra))
    all_rewards_ra.append(np.array(rewards_ra))
    all_actions_ra.append(np.array(actions_ra))
    all_net_energy_ra.append(env.mg.net_energy)

plot_results(env, all_states_ra, all_rewards_ra, all_actions_ra, all_net_energy_ra, 'Random Agent (Family)', save=True, filename='imgs/ra_family.png')

# CVX Agent

In [None]:
set_all_seeds(0)

config = load_config("zero")
config = config['train']
env = MGSimple(config=config['env'])

perf_actions_family = [
    0.26258278,  0.11611298,  0.07428618,  0.07643896,  0.12529734,
    0.23417065, -0.18023179, -0.7086571 ,  0.18634816,  0.60601339,
    -0.24662803, -0.08707155, -0.07357717, -0.18069343,  0.58562932,
    0.09886819, -0.20698431, -0.48973657,  0.14986115,  0.01168165,
    -0.05987804, -0.05891896, -0.08977367, -0.14514013
]

perf_actions = perf_actions_family

all_states_opt, all_rewards_opt, all_actions_opt, all_net_energy_opt = [], [], [], []

# Just a couple iterations to generate the line

for _ in range(2):

    states_opt, rewards_opt, actions_opt= [], [], []
    
    # Initialize states and rewards

    state_0, r_0, done, _ = env.reset()

    states_opt.append(state_0)
    rewards_opt.append(r_0)

    while not done:

        action = np.ones((config['env']['batch_size'], 1))
        action = action * perf_actions[env.mg.current_step]

        s_t, r_t, done, _ = env.step(action)

        states_opt.append(s_t)
        rewards_opt.append(r_t)
        actions_opt.append(action)

    all_states_opt.append(states_opt)
    all_rewards_opt.append(rewards_opt)
    all_actions_opt.append(actions_opt)
    all_net_energy_opt.append(env.mg.net_energy)

plot_results(env, all_states_opt, all_rewards_opt, all_actions_opt, all_net_energy_opt, 'CVX Agent (Family)', save=True, filename='imgs/cvx_family.png')

# A2C Causality cont.

In [None]:
import traceback

from src.environments.mg_simple import MGSimple
from src.rl.a2c.c_mg_simple import Agent

try:
    config = load_config("c_a2c")
    config = config['train']
    
    '''
        Run the simulator
    '''

    set_all_seeds(0)

    # Instantiate the environment

    my_env = MGSimple(config=config['env'])

    # Instantiate the agent

    agent = Agent(
        env=my_env, config = config
    )

    # Launch the training

    all_states, all_rewards, all_actions, all_net_energy = agent.train()

    # Finish Wandb execution

    agent.wdb_logger.finish()

except (RuntimeError, KeyboardInterrupt):

    traceback.print_exc()

In [None]:
# Plot results (Family)

print(wdb_config)

plot_results(my_env, all_states, all_rewards, all_actions, all_net_energy, 'A2C C (Family) results', save=False, filename='a2c_results.png')


In [None]:
# Plot results (Teenagers)

print(wdb_config)

plot_results(my_env, all_states, all_rewards, all_actions, all_net_energy, 'A2C C (Teenagers) results', save=False, filename='a2c_results.png')


# PG disc.

In [None]:
import traceback

from src.environments.mg_simple import MGSimple
from src.rl.pg.d_mg_simple import Agent

# Start wandb logger

try:

    config = load_config("d_pg")
    config = config['train']
    '''
        Run the simulator
    '''

    set_all_seeds(0)

    # Instantiate the environment

    my_env = MGSimple(config=config['env'])

    # Instantiate the agent

    agent = Agent(
        env=my_env, config = config
    )

    # Launch the training

    all_states, all_rewards, all_actions, all_net_energy = agent.train()

    # Finish Wandb execution

    agent.wdb_logger.finish()

except (RuntimeError, KeyboardInterrupt):

    traceback.print_exc()

In [None]:
# Plot results (Family)

# print(wdb_config)

plot_results(my_env, all_states, all_rewards, all_actions, all_net_energy, 'PG D results (Family) with noise', save=True, filename='imgs/d_pg_results_family_n.png')


In [None]:
# Plot results (Teenagers)

print(wdb_config)

plot_results(my_env, all_states, all_rewards, all_actions, all_net_energy, 'PG D results', save=False, filename='a2c_results.png')

In [None]:
# Plot results (Family)

print(wdb_config)

plot_results(my_env, all_states, all_rewards, all_actions, all_net_energy, 'PG D results', save=False, filename='a2c_results.png')

# A2C Causality disc.

In [None]:
import traceback

from src.environments.mg_simple import MGSimple
from src.rl.a2c.d_mg_simple import Agent

# Start wandb logger

try:
    config = load_config("d_a2c")
    config = config['train']

    '''
        Run the simulator
    '''

    set_all_seeds(0)

    # Instantiate the environment

    my_env = MGSimple(config=config['env'])

    # Instantiate the agent

    agent = Agent(
        env=my_env, config = config
    )

    # Launch the training

    all_states, all_rewards, all_actions, all_net_energy = agent.train()

    # Finish Wandb execution

    agent.wdb_logger.finish()

except (RuntimeError, KeyboardInterrupt):

    traceback.print_exc()

In [None]:
# Plot results (Family)

print(wdb_config)

plot_results(my_env, all_states, all_rewards, all_actions, all_net_energy, 'A2C D results (Family) without noise', save=True, filename='imgs/d_a2c_results_family_nn.png')


In [None]:
# Plot results (Home Business)

print(wdb_config)

plot_results(my_env, all_states, all_rewards, all_actions, all_net_energy, 'A2C D Home Business results', save=False, filename='a2c_results.png')


In [None]:
# Plot results (Teenagers)

# print(wdb_config)

plot_results(my_env, all_states, all_rewards, all_actions, all_net_energy, 'A2C D results', save=False, filename='a2c_results.png')
