In [13]:
import cvxpy
from matplotlib import pyplot as plt
import numpy as np
from src.components.synthetic_house import SyntheticHouse
from src.components.synthetic_microgrid import SyntheticMicrogrid
from src.environments.simple_microgrid import SimpleMicrogrid
from src.rl.a2c.d_simple_microgrid import Agent
from src.components.battery import Battery, BatteryParameters
from src.utils.tools import set_all_seeds, load_config

# Microgrid

## Functions

In [14]:
def solver(house: SyntheticHouse, n: int = 24):

    battery = cvxpy.Variable(n+1)
    action = cvxpy.Variable(n)
    consumption = cvxpy.Variable(n)

    constraints = []

    # Battery
        # Starts in 0.1
    constraints.append(battery[0] == house.battery.soc_min)
        # Max and min batteries
    for i in range(n+1):
        constraints.append(battery[i] <= house.battery.soc_max)
        constraints.append(battery[i] >= house.battery.soc_min)


    # Action / Batteryn't

    for i in range(n):
        constraints.append(action[i] <= 1)
        constraints.append(action[i] >= -1)


    # Transition
    obj = 0

    for i in range(n):
        
        constraints.append(action[i] <= house.battery.p_charge_max)
        constraints.append(action[i] <= house.battery.p_discharge_max)
        # Update battery
        constraints.append(battery[i+1] == battery[i] + action[i] * house.battery.efficiency)
        # Update net 
        constraints.append(consumption[i] == house.demand[i]-house.pv_gen[i] + action[i] * house.battery.efficiency)


        obj += cvxpy.maximum(consumption[i] * (house.price[i] + house.emission[i]),0) 
        obj += cvxpy.maximum(-consumption[i] * house.price[i] * house.grid_sell_rate,0)  


    objective = cvxpy.Minimize(obj)
    prob = cvxpy.Problem(objective, constraints)
    res = prob.solve()

    return res, battery.value, action.value

In [15]:
def get_all_actions(env: SimpleMicrogrid, mode : str = 'train') -> np.ndarray:
    # Set mode, train, eval, test
    env.mg.change_mode(mode)

    # Create arrays to hold score, battery SOC and Action for all houses
    rewards, battery_values, action_values = [],[],[]
    
    # Same for all houses
    for house in env.mg.houses:
        reward, batt, action = solver(house)
        
        rewards.append(reward)
        battery_values.append(batt)
        action_values.append(action)
    battery_values = np.array(battery_values)
    action_values = np.array(action_values)
    rewards = np.array(rewards)

    return rewards, battery_values, action_values
    # print("Mean", scores.mean(), "\n Scores",rewards)    

In [16]:
def loop_env(env: SimpleMicrogrid, action_values: np.ndarray, mode : str = 'train') -> np.ndarray:
    done = 0
    env.mg.change_mode(mode)

    env.reset()

    # Cycle the entire episode with already computed actions by solver
    while not done:
        time_step = env.mg.current_step
        _,_,done,_ = env.step(action_values[:,time_step])
        time_step += 1

    # Calculate Score with actions
    return env.mg.get_houses_metrics() # output price, emissions 

## Run this

In [17]:
set_all_seeds(0)
# create environment,m save array of houses
config = load_config("zero_mg")
env = SimpleMicrogrid(config=config['env'])

# Train
mode = 'train'
rewards, battery_values, action_values = get_all_actions(env, mode)
train_metrics = loop_env(env, action_values, mode)
print(rewards[0].mean())
print('train ', train_metrics)

# Eval
mode = 'eval'
rewards, battery_values, action_values = get_all_actions(env, mode)
print(rewards[0].mean())
eval_metrics = loop_env(env, action_values, mode)

# Test
mode = 'test'
rewards, battery_values, action_values = get_all_actions(env, mode)
print(rewards[0].mean())
test_metrics = loop_env(env, action_values, mode)


1.0029444192063173
train  (array([-0.09929886, -0.16271739, -0.04808466, -0.15198558,  0.03388161,
        0.03724827]), array([-0.23935303, -0.31534628, -0.13570712, -0.28165016, -0.0298125 ,
        0.00033534]))
1.2039406659431469
2.5285933769868048


In [18]:
np.array(train_metrics).mean(axis=1)

array([-0.06515943, -0.16692229])

In [19]:
train_metrics, eval_metrics, test_metrics

((array([-0.09929886, -0.16271739, -0.04808466, -0.15198558,  0.03388161,
          0.03724827]),
  array([-0.23935303, -0.31534628, -0.13570712, -0.28165016, -0.0298125 ,
          0.00033534])),
 (array([-0.18073769, -0.13861451, -0.13705395,  0.01801452, -0.08770782,
          0.02134148]),
  array([-0.35173296, -0.27818922, -0.25554218, -0.02177083, -0.17006995,
         -0.02798345])),
 (array([-0.17577385, -0.25198997, -0.18895732,  0.04679109, -0.04659103,
          0.03022902,  0.00812369, -0.0376431 ,  0.06172703, -0.03975418]),
  array([-3.22765870e-01, -4.61615321e-01, -3.43667000e-01, -2.69293021e-04,
         -1.39544784e-01, -6.55449237e-03, -6.60324787e-02, -7.36377648e-02,
          8.35868715e-03, -7.45623736e-02])))

In [21]:
set_all_seeds(0)

model = "d_a2c_mg"
config = load_config(model)
config = config['train']
my_env = SimpleMicrogrid(config=config['env'])

agent = Agent(env=my_env, config = config)
results_ag = agent.train()
results_ag['test'] = agent.test()
agent.wdb_logger.finish()

Running on GPU


  0%|          | 0/2000 [00:00<?, ?it/s]

rollout_avg_reward: -1.624745704484815 
actor_loss: -70.49291229248047 
critic_loss: 1.0898540019989014 
avg_action: 0.0038962339743590013 


  0%|          | 1/2000 [00:13<7:28:20, 13.46s/it]

Saving model on step: 0


  2%|▎         | 50/2000 [01:15<35:52,  1.10s/it] 

rollout_avg_reward: -1.5532283402107208 
actor_loss: 3.397155284881592 
critic_loss: 0.3137674033641815 
avg_action: -0.005068108974358944 


  5%|▌         | 100/2000 [02:24<48:53,  1.54s/it]

rollout_avg_reward: -1.4260773960447182 
actor_loss: 1.6605123281478882 
critic_loss: 0.20972318947315216 
avg_action: -0.08238181089743588 


  8%|▊         | 150/2000 [03:28<46:48,  1.52s/it]

rollout_avg_reward: -1.2496904645828117 
actor_loss: 0.8407720327377319 
critic_loss: 0.18547984957695007 
avg_action: -0.1583934294871795 


 10%|█         | 200/2000 [04:28<20:25,  1.47it/s]

rollout_avg_reward: -1.2379203368694796 
actor_loss: 0.3684002757072449 
critic_loss: 0.18030185997486115 
avg_action: -0.20706129807692303 


 12%|█▎        | 250/2000 [05:34<44:44,  1.53s/it]

rollout_avg_reward: -1.24209425880464 
actor_loss: 0.047536373138427734 
critic_loss: 0.18447108566761017 
avg_action: -0.24201722756410257 


 13%|█▎        | 251/2000 [05:36<46:00,  1.58s/it]

Saving model on step: 250


 15%|█▌        | 300/2000 [06:37<35:23,  1.25s/it]

rollout_avg_reward: -1.2394334850886484 
actor_loss: -0.3622734546661377 
critic_loss: 0.18374039232730865 
avg_action: -0.24490184294871792 


 18%|█▊        | 350/2000 [07:45<40:09,  1.46s/it]

rollout_avg_reward: -1.2375378491461315 
actor_loss: -0.044324081391096115 
critic_loss: 0.18275056779384613 
avg_action: -0.24604366987179482 


 20%|██        | 400/2000 [08:49<40:36,  1.52s/it]

rollout_avg_reward: -1.2316793145691836 
actor_loss: 0.11012029647827148 
critic_loss: 0.18317486345767975 
avg_action: -0.24718549679487178 


 22%|██▎       | 450/2000 [09:53<27:48,  1.08s/it]

rollout_avg_reward: -1.2327308014499174 
actor_loss: 0.07841388881206512 
critic_loss: 0.1814463585615158 
avg_action: -0.25026041666666665 


 25%|██▌       | 500/2000 [11:02<39:28,  1.58s/it]

rollout_avg_reward: -1.2367186024546564 
actor_loss: 0.2383328378200531 
critic_loss: 0.18390949070453644 
avg_action: -0.25089142628205124 


 25%|██▌       | 501/2000 [11:02<30:31,  1.22s/it]

Saving model on step: 500


 28%|██▊       | 550/2000 [12:04<35:45,  1.48s/it]

rollout_avg_reward: -1.2288207683512902 
actor_loss: -0.03653176873922348 
critic_loss: 0.17840643227100372 
avg_action: -0.2455528846153846 


 30%|███       | 600/2000 [13:07<15:50,  1.47it/s]

rollout_avg_reward: -1.2189109345487037 
actor_loss: 0.1566474437713623 
critic_loss: 0.17799891531467438 
avg_action: -0.23858173076923073 


 32%|███▎      | 650/2000 [14:17<35:02,  1.56s/it]

rollout_avg_reward: -1.200436893693592 
actor_loss: -0.006015460006892681 
critic_loss: 0.1741563230752945 
avg_action: -0.21232972756410254 


 35%|███▌      | 700/2000 [15:23<33:42,  1.56s/it]

rollout_avg_reward: -1.167828086481874 
actor_loss: 0.050843238830566406 
critic_loss: 0.1663559079170227 
avg_action: -0.1838842147435897 


 38%|███▊      | 750/2000 [16:29<26:41,  1.28s/it]

rollout_avg_reward: -1.1340446416889665 
actor_loss: -0.14431476593017578 
critic_loss: 0.16194774210453033 
avg_action: -0.1581630608974359 


 38%|███▊      | 751/2000 [16:31<28:57,  1.39s/it]

Saving model on step: 750


 40%|████      | 800/2000 [17:37<30:38,  1.53s/it]

rollout_avg_reward: -1.1047244304092223 
actor_loss: 0.2175191342830658 
critic_loss: 0.1434435099363327 
avg_action: -0.1113782051282051 


 42%|████▎     | 850/2000 [18:43<29:16,  1.53s/it]

rollout_avg_reward: -1.0771175052413158 
actor_loss: -0.13802513480186462 
critic_loss: 0.12832482159137726 
avg_action: -0.07295673076923076 


 45%|████▌     | 900/2000 [19:44<16:52,  1.09it/s]

rollout_avg_reward: -1.068829676208544 
actor_loss: -0.07835801690816879 
critic_loss: 0.12083522230386734 
avg_action: -0.05322516025641022 


 48%|████▊     | 950/2000 [20:54<27:51,  1.59s/it]

rollout_avg_reward: -1.0693956147798092 
actor_loss: -0.21780380606651306 
critic_loss: 0.12319767475128174 
avg_action: -0.03231169871794868 


 50%|█████     | 1000/2000 [22:00<26:10,  1.57s/it]

rollout_avg_reward: -1.0562847045228936 
actor_loss: -0.10657215118408203 
critic_loss: 0.12012384086847305 
avg_action: -0.029226762820512793 


 50%|█████     | 1001/2000 [22:02<26:24,  1.59s/it]

Saving model on step: 1000


 52%|█████▎    | 1050/2000 [23:07<22:31,  1.42s/it]

rollout_avg_reward: -1.0480665945063896 
actor_loss: -0.15339970588684082 
critic_loss: 0.11602097004652023 
avg_action: -0.021524439102564057 


 55%|█████▌    | 1100/2000 [24:13<10:50,  1.38it/s]

rollout_avg_reward: -1.055013767851665 
actor_loss: -0.1118261069059372 
critic_loss: 0.12011851370334625 
avg_action: -0.04081530448717948 


 57%|█████▊    | 1150/2000 [25:21<21:46,  1.54s/it]

rollout_avg_reward: -1.0425444750360093 
actor_loss: 0.10492189973592758 
critic_loss: 0.11870673298835754 
avg_action: -0.05197315705128204 


 60%|██████    | 1200/2000 [26:24<18:39,  1.40s/it]

rollout_avg_reward: -1.0448066093496642 
actor_loss: -0.191390722990036 
critic_loss: 0.11882983148097992 
avg_action: -0.04299879807692306 


 62%|██████▎   | 1250/2000 [27:30<07:54,  1.58it/s]

rollout_avg_reward: -1.0504180952677709 
actor_loss: 0.004160960670560598 
critic_loss: 0.11975815892219543 
avg_action: -0.04725560897435896 


 63%|██████▎   | 1251/2000 [27:31<07:21,  1.69it/s]

Saving model on step: 1250


 65%|██████▌   | 1300/2000 [28:06<09:31,  1.23it/s]

rollout_avg_reward: -1.0461851393166066 
actor_loss: 0.009436528198421001 
critic_loss: 0.11490220576524734 
avg_action: -0.0016025641025640784 


 68%|██████▊   | 1350/2000 [29:23<12:12,  1.13s/it]

rollout_avg_reward: -1.0468539040710363 
actor_loss: -0.06203901767730713 
critic_loss: 0.12170282751321793 
avg_action: -0.06256009615384613 


 70%|███████   | 1400/2000 [30:33<15:34,  1.56s/it]

rollout_avg_reward: -1.045444766884058 
actor_loss: -0.18992218375205994 
critic_loss: 0.11984584480524063 
avg_action: -0.05132211538461537 


 72%|███████▎  | 1450/2000 [31:35<13:38,  1.49s/it]

rollout_avg_reward: -1.0426815369001239 
actor_loss: -0.19644109904766083 
critic_loss: 0.11704018712043762 
avg_action: -0.040114182692307675 


 75%|███████▌  | 1500/2000 [32:39<07:17,  1.14it/s]

rollout_avg_reward: -1.0457299431664686 
actor_loss: 0.05046912282705307 
critic_loss: 0.11812761425971985 
avg_action: -0.060386618589743576 


 75%|███████▌  | 1501/2000 [32:41<08:58,  1.08s/it]

Saving model on step: 1500


 78%|███████▊  | 1550/2000 [33:46<10:50,  1.45s/it]

rollout_avg_reward: -1.040044533450617 
actor_loss: -0.10384460538625717 
critic_loss: 0.11651379615068436 
avg_action: -0.054266826923076904 


 80%|████████  | 1600/2000 [34:51<10:12,  1.53s/it]

rollout_avg_reward: -1.0415271362285743 
actor_loss: -0.03689539432525635 
critic_loss: 0.11617105454206467 
avg_action: -0.02927684294871794 


 82%|████████▎ | 1650/2000 [35:55<04:58,  1.17it/s]

rollout_avg_reward: -1.0425746815325534 
actor_loss: 0.1707237958908081 
critic_loss: 0.11619456857442856 
avg_action: -0.04266826923076923 


 85%|████████▌ | 1700/2000 [37:02<07:52,  1.57s/it]

rollout_avg_reward: -1.042318727325167 
actor_loss: -0.04353861138224602 
critic_loss: 0.11736660450696945 
avg_action: -0.05578926282051281 


 88%|████████▊ | 1750/2000 [38:06<05:45,  1.38s/it]

rollout_avg_reward: -1.041681400730242 
actor_loss: 0.0341775044798851 
critic_loss: 0.1159469410777092 
avg_action: -0.04683493589743589 


 88%|████████▊ | 1751/2000 [38:08<05:57,  1.44s/it]

Saving model on step: 1750


 90%|█████████ | 1800/2000 [39:12<02:29,  1.34it/s]

rollout_avg_reward: -1.0426161918475347 
actor_loss: 0.19836434721946716 
critic_loss: 0.11662819981575012 
avg_action: -0.057291666666666664 


 92%|█████████▎| 1850/2000 [40:20<03:59,  1.60s/it]

rollout_avg_reward: -1.0396302791555938 
actor_loss: 0.08351850509643555 
critic_loss: 0.11544070392847061 
avg_action: -0.03260216346153845 


 95%|█████████▌| 1900/2000 [41:24<02:23,  1.43s/it]

rollout_avg_reward: -1.0384493449349834 
actor_loss: 0.04444877430796623 
critic_loss: 0.11411681771278381 
avg_action: -0.021965144230769215 


 98%|█████████▊| 1950/2000 [42:29<00:44,  1.12it/s]

rollout_avg_reward: -1.0382919570008882 
actor_loss: 0.21426928043365479 
critic_loss: 0.11401542276144028 
avg_action: -0.056500400641025635 


100%|██████████| 2000/2000 [43:38<00:00,  1.31s/it]
100%|██████████| 2000/2000 [11:27<00:00,  2.91it/s]


# House

### Set env vars

In [47]:
set_all_seeds(0)

# Same env obs as test simple env

config = load_config("c_a2c")
config = config['train']

mg = SyntheticHouse(config=config['env'])
demand = mg.demand
pv = mg.pv_gen
price_s = mg.price
emission = mg.emission
price_b = price_s / 4

In [48]:
battery_params = {
    "soc_0": 0.1,
    "soc_max":0.9,
    "soc_min":0.1,
    "p_charge_max":0.8,
    "p_discharge_max":0.8,
    "efficiency":0.9,
    "capacity":1,
    "sell_price":0.0,
    "buy_price":0.0
    
}

real_battery = Battery(random_soc_0=False, params = BatteryParameters(battery_params))

p_charge, p_discharge, _ = real_battery.check_battery_constraints(power_rate=0.8)
real_battery.apply_action(p_charge=p_charge, p_discharge=p_discharge)

real_battery.soc.item()

# battery.reset()

0.8200000000000001

### Adv battery calc (not working)

In [49]:
set_all_seeds(0)
battery = cvxpy.Variable(n+1)
action = cvxpy.Variable(n)
consumption = cvxpy.Variable(n)

constraints = []
# Battery
    # Starts in 0.1
constraints.append(battery[0] == battery_params["soc_min"])
    # Max and min batteries
for i in range(n+1):
    constraints.append(battery[i] <= battery_params["soc_max"])
    constraints.append(battery[i] >= battery_params["soc_min"])


# Action / Batteryn't
# for i in range(n):
#     constraints.append(action[i] <= 0.9)
#     constraints.append(action[i] >= -0.9)

# Transition
obj = 0

for i in range(n):
    # Max and min battery charge 
    constraints.append(action[i] <= battery_params["p_charge_max"])
    constraints.append(action[i] <= battery_params["p_discharge_max"])
    # Update battery SOC
    # self.soc = self.soc + (p_charge * self.efficiency - p_discharge / self.efficiency) / self.capacity

    constraints.append(battery[i+1] == battery[i] + ((action[i] * battery_params["efficiency"])/battery_params["efficiency"])/battery_params["capacity"] )
    # Update net 
    constraints.append(consumption[i] == demand[i]-pv[i] + action[i] * battery_params["efficiency"])


    obj += cvxpy.maximum(consumption[i] * (price_s[i] + emission[i]),0) 
    obj += cvxpy.maximum(-consumption[i] * price_b[i],0)  


objective = cvxpy.Minimize(obj)
prob = cvxpy.Problem(objective, constraints)
res = prob.solve()

res, battery.value, action.value, consumption.value, price_s

NameError: name 'n' is not defined

### Real dataset

In [None]:
n = 24

battery = cvxpy.Variable(n+1)
action = cvxpy.Variable(n)
consumption = cvxpy.Variable(n)

constraints = []
# Battery
    # Starts in 0.1
constraints.append(battery[0] == battery_params["soc_min"])
    # Max and min batteries
for i in range(n+1):
    constraints.append(battery[i] <= battery_params["soc_max"])
    constraints.append(battery[i] >= battery_params["soc_min"])


# Action / Batteryn't

for i in range(n):
    constraints.append(action[i] <= 1)
    constraints.append(action[i] >= -1)


# Transition
obj = 0

for i in range(n):
    
    constraints.append(action[i] <= battery_params["p_charge_max"])
    constraints.append(action[i] <= battery_params["p_discharge_max"])
    # Update battery
    constraints.append(battery[i+1] == battery[i] + action[i] * battery_params["efficiency"])
    # Update net 
    constraints.append(consumption[i] == demand[i]-pv[i] + action[i] * battery_params["efficiency"])


    obj += cvxpy.maximum(consumption[i] * (price_s[i] + emission[i]),0) 
    obj += cvxpy.maximum(-consumption[i] * price_b[i],0)  


objective = cvxpy.Minimize(obj)
prob = cvxpy.Problem(objective, constraints)
res = prob.solve()

res, battery.value, action.value

### Test Dataset

In [None]:
n = 4
d = np.random.rand(n)
g = np.random.rand(n)*0.5
pb = np.random.rand(n)
ps = pb/4

In [None]:

battery = cvxpy.Variable(n+1)
action = cvxpy.Variable(n)
consumption = cvxpy.Variable(n)


constraints = []
for i in range(n+1):
    constraints.append(battery[i] <= 1)
    constraints.append(battery[i] >= 0)
constraints.append(battery[0] == 0)

for i in range(n):
    constraints.append(action[i] <= .3)
    constraints.append(action[i] >= -1)
    

obj = 0
for i in range(n):
    constraints.append(battery[i+1] == battery[i] + action[i]  )

    constraints.append(consumption[i] == d[i]-g[i] + action[i] )


    obj += cvxpy.maximum(consumption[i]* pb[i],0) 
    obj += cvxpy.maximum(-consumption[i]*ps[i],0)  


objective = cvxpy.Minimize(obj)
prob = cvxpy.Problem(objective, constraints)
res = prob.solve()

res, battery.value, action.value