In [2]:
import pandas as pd
import numpy as np
import random  
import matplotlib.pyplot as plt
import time
import math
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO, DQN, A2C
from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback, EvalCallback
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from sb3_contrib import RecurrentPPO
import gymnasium as gym
from gymnasium import spaces
import torch
import torch as th
from torch import nn
import torch.nn as nn
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler

from typing import Callable




In [3]:
file_path = r"C:\Users\kubaw\Desktop\DELFT\THESIS\CH5"

# Use pandas to read the CSV file
JA_OUTPUT = pd.read_csv(file_path + "/AC_OUTPUT_JA")

elec_df = pd.read_csv(file_path + "/hourly_consumption2.csv")
import_price = pd.read_csv(file_path + "/electricity_tariff.csv")

elec_df = elec_df * 1000
elec_df = elec_df.drop('HourOfYear', axis=1)

elec_df['hour_of_day'] = np.arange(8760) % 24
elec_df['day_of_week'] = np.arange(8760) // 24 % 7  # 0 is Monday, 6 is Sunday

# Define rates
peak_rate = 1.45
normal_rate = 1
off_peak_rate = 0.85

# Function to determine rate based on hour and day
def determine_rate(hour, day):
    if day < 5:  # Monday to Friday
        if 16 <= hour < 21:  # 4pm to 9pm
            return peak_rate
        elif 6 <= hour < 10:  # 7am to 9am and 10am to 3pm
            return normal_rate
        else:  # Off-peak times
            return off_peak_rate
    else:  # Weekend
        if 16 <= hour < 21:  # 4pm to 9pm
            return normal_rate
        else:  # Off-peak times
            return off_peak_rate
    
# Apply the function to each row to determine the rate
elec_df['rate'] = elec_df.apply(lambda row: determine_rate(row['hour_of_day'], row['day_of_week']), axis=1)
import_price_df = import_price.drop(columns=['x'])
import_price_df = import_price_df[:-26]

train_cols = random.sample(list(import_price_df.columns), 7000)
import_price_train = import_price_df[train_cols]
test_cols = [col for col in import_price_df.columns if col not in train_cols]
import_price_test = import_price_df[test_cols]

JA_OUTPUT_arr = (np.array(JA_OUTPUT.T)).flatten()

elec_consum_arr = np.array(elec_df["Consumption"])
import_price_rate = np.array(elec_df["rate"])
hour = np.array(elec_df["hour_of_day"])

import_price_train_arr = np.array(import_price_train.T)
import_price_test_arr = np.array(import_price_train.T)

In [4]:
import_price_train_arr.max()

0.0020975665988171

In [6]:
if len(elec_consum_arr) % 168 != 0:
    # Reshape the array to (52, 168) but handle the last week specially
    weekly_elec_consum_arr = np.split(elec_consum_arr[:-24], 52)
    last_week_extended = np.concatenate((weekly_elec_consum_arr[-1], elec_consum_arr[-24:]))
    weekly_elec_consum_arr[-1] = last_week_extended

if len(JA_OUTPUT_arr) % 168 != 0:
    # Reshape the array to (52, 168) but handle the last week specially
    weekly_JA_OUTPUT_arr = np.split(JA_OUTPUT_arr[:-24], 52)
    last_week_extended = np.concatenate((weekly_JA_OUTPUT_arr[-1], JA_OUTPUT_arr[-24:]))
    weekly_JA_OUTPUT_arr[-1] = last_week_extended
    
weekly_JA_OUTPUT_arr = weekly_JA_OUTPUT_arr[:51]
weekly_elec_consum_arr = weekly_elec_consum_arr[:51]

In [7]:
class BES_env(gym.Env):
    def __init__(self, weekly_JA_OUTPUT_arr, weekly_elec_consum_arr, import_price_rate, import_tariff, hour):
        
        #PV
        self.solar_panels_df = weekly_JA_OUTPUT_arr
        
        # ELEC CONSUM
        self.load_df = weekly_elec_consum_arr
        self.rate = import_price_rate
        
        # Import Tariff
        self.import_df = import_tariff
        
        self.episode_len = 168
        self.hour_of_day = hour
        
        self.max_charge = 1500
        self.dis_charge_eff = 0.95
        
        self.max_export = 4000
        
        # action[0] - charge / discharge, # action[1] - charge from grid / charge from PV
        self.action_space = spaces.Box(-1, 1, shape=(1,), dtype=np.float32)
        
        # SOC, SOC - 1, pv - load, pv - load + 1, pv - load + 2, import, week of year, hour_of_day, battery kw, pv size, elec price
        self.observation_space = spaces.Box(0, 1, shape=(11,), dtype=np.float32)
        
    def reset(self, seed=None):
        
        self.current_step = 0
        
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
            
        row_import = np.random.randint(self.import_df.shape[0])
        col_import = np.random.randint(self.import_df.shape[1])
            
        self.ep_import_price = self.import_df[row_import, col_import]
        self.import_price_at_zero = self.ep_import_price * 0.85 

        self.ep_export_price = self.ep_import_price * 0.3
        
        self.week_of_year = np.random.randint(0, 51)
        
        self.solar_panels = self.solar_panels_df[self.week_of_year]
        self.load = self.load_df[self.week_of_year]
        
        self.batery_float = np.random.uniform(0.8, 1.0)
        self.base_batery_cap = 5000  # 5kW
        self.batery_cap = self.base_batery_cap * self.batery_float
        
        self.min_SOC = 0.05 * self.batery_cap
        self.max_SOC = self.batery_cap 
        
        self.leak = self.batery_cap * 0.0001

        self.initial_SOC = np.random.randint(self.min_SOC, self.max_SOC)
        self.current_SOC = self.initial_SOC
        self.previous_SOC = self.initial_SOC
        self.OBS_initial_SOC = self.initial_SOC / self.batery_cap
                        
        self.pv_size = np.random.uniform(6, 12)
        self.pv = self.solar_panels * self.pv_size
        
        self.pv_at_zero = self.pv[self.current_step]
        self.load_at_zero = self.load[self.current_step]
        self.pv_load_at_zero = self.pv_at_zero - self.load_at_zero
        self.OBS_pv_load_at_zero = (self.pv_load_at_zero  - (-1665)) / (3756 - (-1667))
        
        self.next_step_pv = self.pv[self.current_step + 1]
        self.next_step_load = self.load[self.current_step + 1]
        self.next_step_pv_load = self.next_step_pv - self.next_step_load
        self.OBS_next_step_pv_load = (self.next_step_pv_load  - (-1665)) / (3756 - (-1665))
        
        self.two_next_step_pv = self.pv[self.current_step + 2]
        self.two_next_step_load = self.load[self.current_step + 2]
        self.two_next_step_pv_load = self.next_step_pv - self.next_step_load
        self.OBS_two_next_step_pv_load = (self.two_next_step_pv_load  - (-1665)) / (3756 - (-1665))
        
        self.OBS_ep_import_price_at_zero = self.import_price_at_zero / 1.45
                                    
        self.OBS_current_step = self.current_step / self.episode_len
        
        self.OBS_batery_cap = (self.batery_cap - 3500) / (self.base_batery_cap - 3500)
        
        self.OBS_week = self.week_of_year / 51
        
        self.OBS_pv_size = (self.pv_size - 6) / (12 - 6)
        
        self.OBS_import_price = (self.ep_import_price - 0.000162) / (0.0025724 - 0.000162)
                
        self.observation = np.array([self.OBS_initial_SOC, self.OBS_initial_SOC, self.OBS_pv_load_at_zero, 
                                     self.OBS_next_step_pv_load, self.OBS_two_next_step_pv_load, 
                                     self.OBS_ep_import_price_at_zero, self.OBS_week, 0, self.OBS_batery_cap,
                                    self.OBS_pv_size, self.OBS_import_price], dtype=np.float32)
        
        info = {}

        
        return self.observation, info
        
    def step(self, action):
        
        # ASSIGN VARIABLES
        self.step_import_price = self.ep_import_price * self.rate[self.current_step]
        self.step_pv = self.pv[self.current_step]
        self.step_load = self.load[self.current_step]
        to_grid = 0
        from_grid = 0
        too_low = 0
        import_cost = 0
        minimised = 0
        too_high = False
        
        
        # A C T I O N S - Chargins and Discharging
        action = action * self.max_charge * self.dis_charge_eff
        
        if action > 0: 
            discharge_request = 0
            charge_request = action
        else: 
            discharge_request = (- action)
            charge_request = 0
            
        if self.current_SOC >= discharge_request:
            actual_discharge = discharge_request
        else:
            actual_discharge = self.current_SOC  # Battery cannot discharge more than it has
            
        if charge_request + self.current_SOC <= self.max_SOC:
            actual_charge = charge_request
        else:
            actual_charge = self.max_SOC - self.current_SOC  # Battery cannot charge more than it has        
        
        self.step_pv_load = self.step_pv - self.step_load
        # A C T I O N S  L O G I C
        
        # CONDITION A - load is HIGHER than pv production (there is a shortge)
        if self.step_pv_load < 0:
            if action < 0: # CONDITION A1 - the batery Discharges

                self.step_pv_load_plus_batery_discharge = self.step_pv_load + actual_discharge
                self.current_SOC -= actual_discharge 

                if self.step_pv_load_plus_batery_discharge > 0: # CONDITION A1A - the energy shortage is satisfied by batery,
                    to_grid += min(self.step_pv_load_plus_batery_discharge, self.max_export) # then any surplus to grid.

                else: # CONDITION A1B - the energy shortage is NOT satisfied by batery, 
                    from_grid += (- self.step_pv_load_plus_batery_discharge) # then shortage supplemented from grid

            elif action > 0: # CONDITION A2 - despite the shortage the batery charges

                self.current_SOC += actual_charge
                from_grid += (- self.step_pv_load) # then shortage is imported from the grid, 
                from_grid += actual_charge # and energy is imported for charging. 

        else: # CONDITION B the load is LOWER than pv production (there is a surplus)

            if action >= 0: # CONDITION B1 - the batery Charges
                self.step_pv_load_minus_batery_charge = self.step_pv_load - actual_charge
                self.current_SOC += actual_charge # charge the batery

                if self.step_pv_load_minus_batery_charge > 0: # CONDITION B1A - the surplus persists after charging batery,
                    to_grid += min(self.step_pv_load_minus_batery_charge, self.max_export) # then it is sold to the grid

                else:   # CONDITION B1B - the charge is higher than surplus
                    from_grid += ( - self.step_pv_load_minus_batery_charge)

            if action < 0: # CONDITION B2 - Despite there being a surplus the batery discharges
                self.current_SOC -= actual_discharge
                to_grid += self.step_pv_load + actual_discharge        
        

        # R E W A R D
        export_revenue = to_grid * self.ep_export_price
        import_cost = from_grid * self.step_import_price
        
        import_no_battery_cost = 0
        if self.step_pv_load < 0:
            import_no_battery_cost = (- self.step_pv_load) * self.step_import_price
        
        minimised = import_no_battery_cost - import_cost
        
        
        reward = float(- import_cost + export_revenue)
        
        self.previous_SOC = self.observation[0] * self.batery_cap
        
        self.observation[0] = self.current_SOC / self.batery_cap
        self.observation[1] = self.previous_SOC / self.batery_cap
        
        self.current_step += 1
        
        if self.current_step < 166:
            self.step_pv = self.pv[self.current_step]
            self.next_step_pv = self.pv[self.current_step + 1]
            self.two_next_step_pv = self.pv[self.current_step + 2]

            self.step_load = self.load[self.current_step]
            self.next_step_load = self.load[self.current_step + 1]
            self.two_next_step_load = self.load[self.current_step + 2]

            self.step_pv_load = self.step_pv - self.step_load
            self.OBS_step_pv_load = (self.step_pv_load  - (-1665)) / (3756 - (-1665))
            self.observation[2] = self.OBS_step_pv_load
                                    
            self.next_step_pv_load = self.next_step_pv - self.next_step_load
            self.OBS_next_step_pv_load = (self.next_step_pv_load  - (-1665)) / (3756 - (-1665))
            self.observation[3] = self.OBS_next_step_pv_load
                                    
            self.two_next_step_pv_load = self.two_next_step_pv - self.two_next_step_load
            self.OBS_two_next_step_pv_load = (self.two_next_step_pv_load  - (-1665)) / (3756 - (-1665))                
            self.observation[4] = self.OBS_two_next_step_pv_load
            
        elif self.current_step == 166:
            self.step_pv = self.pv[self.current_step]
            self.next_step_pv = self.pv[self.current_step + 1]

            self.step_load = self.load[self.current_step]
            self.next_step_load = self.load[self.current_step + 1]

            self.step_pv_load = self.step_pv - self.step_load
            self.OBS_step_pv_load = (self.step_pv_load  - (-1665)) / (3756 - (-1665))
            self.observation[2] = self.OBS_step_pv_load
                                    
            self.next_step_pv_load = self.next_step_pv - self.next_step_load
            self.OBS_next_step_pv_load = (self.next_step_pv_load  - (-1665)) / (3756 - (-1665))            
            self.observation[3] = self.OBS_next_step_pv_load
            self.observation[4] = 0
            
        else: 
            self.step_pv = self.pv[self.current_step]
            self.step_load = self.load[self.current_step]
            self.step_pv_load = self.step_pv - self.step_load
            self.OBS_step_pv_load = (self.step_pv_load  - (-1665)) / (3756 - (-1665))   
            self.observation[2] = self.OBS_step_pv_load
            self.observation[3] = 0
            self.observation[4] = 0
            
        self.OBS_step_import_price = self.rate[self.current_step] / 1.45
        
        self.observation[5] = self.OBS_step_import_price
                
        self.observation[7] = self.hour_of_day[self.current_step] / 24
        
        self.current_SOC -= self.leak
        
        
        info = {"check_1": self.step_pv_load,
               "check_2": self.current_step}
        
        
        truncated = False
        done = self.current_step == 167
        
        return self.observation, reward, done, False, info

In [None]:
class BES_env_test(gym.Env):
    def __init__(self, weekly_JA_OUTPUT_arr, weekly_elec_consum_arr, import_price_rate, import_tariff, hour):
        
        #PV
        self.solar_panels_df = weekly_JA_OUTPUT_arr
        
        # ELEC CONSUM
        self.load_df = weekly_elec_consum_arr
        self.rate = import_price_rate
        
        # Import Tariff
        self.import_df = import_tariff
        
        self.episode_len = 168
        self.hour_of_day = hour
        
        self.max_charge = 1500
        self.dis_charge_eff = 0.95
        
        self.max_export = 4000
        
        # action[0] - charge / discharge, # action[1] - charge from grid / charge from PV
        self.action_space = spaces.Box(-1, 1, shape=(1,), dtype=np.float32)
        
        # SOC, SOC - 1, pv - load, pv - load + 1, pv - load + 2, import, week of year, hour_of_day, battery kw, pv size, elec price
        self.observation_space = spaces.Box(0, 1, shape=(11,), dtype=np.float32)
        
    def reset(self, seed=None):
        
        self.current_step = 0
        
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
            
        row_import = np.random.randint(self.import_df.shape[0])
        col_import = np.random.randint(self.import_df.shape[1])
            
        self.ep_import_price = self.import_df[row_import, col_import]
        self.import_price_at_zero = self.ep_import_price * 0.85 

        self.ep_export_price = self.ep_import_price * 0.3
        
        self.week_of_year = np.random.randint(0, 51)
        
        self.solar_panels = self.solar_panels_df[self.week_of_year]
        self.load = self.load_df[self.week_of_year]
        
        self.batery_float = np.random.uniform(0.8, 1.0)
        self.base_batery_cap = 5000  # 5kW
        self.batery_cap = self.base_batery_cap * self.batery_float
        
        self.min_SOC = 0.05 * self.batery_cap
        self.max_SOC = self.batery_cap 
        
        self.leak = self.batery_cap * 0.0001

        self.initial_SOC = np.random.randint(self.min_SOC, self.max_SOC)
        self.current_SOC = self.initial_SOC
        self.previous_SOC = self.initial_SOC
        self.OBS_initial_SOC = self.initial_SOC / self.batery_cap
                        
        self.pv_size = np.random.uniform(6, 12)
        self.pv = self.solar_panels * self.pv_size
        
        self.pv_at_zero = self.pv[self.current_step]
        self.load_at_zero = self.load[self.current_step]
        self.pv_load_at_zero = self.pv_at_zero - self.load_at_zero
        self.OBS_pv_load_at_zero = (self.pv_load_at_zero  - (-1665)) / (3756 - (-1667))
        
        self.next_step_pv = self.pv[self.current_step + 1]
        self.next_step_load = self.load[self.current_step + 1]
        self.next_step_pv_load = self.next_step_pv - self.next_step_load
        self.OBS_next_step_pv_load = (self.next_step_pv_load  - (-1665)) / (3756 - (-1665))
        
        self.two_next_step_pv = self.pv[self.current_step + 2]
        self.two_next_step_load = self.load[self.current_step + 2]
        self.two_next_step_pv_load = self.next_step_pv - self.next_step_load
        self.OBS_two_next_step_pv_load = (self.two_next_step_pv_load  - (-1665)) / (3756 - (-1665))
        
        self.OBS_ep_import_price_at_zero = self.import_price_at_zero / 1.45
                                    
        self.OBS_current_step = self.current_step / self.episode_len
        
        self.OBS_batery_cap = (self.batery_cap - 3500) / (self.base_batery_cap - 3500)
        
        self.OBS_week = self.week_of_year / 51
        
        self.OBS_pv_size = (self.pv_size - 6) / (12 - 6)
        
        self.OBS_import_price = (self.ep_import_price - 0.000162) / (0.0025724 - 0.000162)
                
        self.observation = np.array([self.OBS_initial_SOC, self.OBS_initial_SOC, self.OBS_pv_load_at_zero, 
                                     self.OBS_next_step_pv_load, self.OBS_two_next_step_pv_load, 
                                     self.OBS_ep_import_price_at_zero, self.OBS_week, 0, self.OBS_batery_cap,
                                    self.OBS_pv_size, self.OBS_import_price], dtype=np.float32)
        
        info = {}

        
        return self.observation, info
        
    def step(self, action):
        
        # ASSIGN VARIABLES
        self.step_import_price = self.ep_import_price * self.rate[self.current_step]
        self.step_pv = self.pv[self.current_step]
        self.step_load = self.load[self.current_step]
        to_grid = 0
        from_grid = 0
        too_low = 0
        import_cost = 0
        minimised = 0
        too_high = False
        
        
        # A C T I O N S - Chargins and Discharging
        action = action * self.max_charge * self.dis_charge_eff
        
        if action > 0: 
            discharge_request = 0
            charge_request = action
        else: 
            discharge_request = (- action)
            charge_request = 0
            
        if self.current_SOC >= discharge_request:
            actual_discharge = discharge_request
        else:
            actual_discharge = self.current_SOC  # Battery cannot discharge more than it has
            
        if charge_request + self.current_SOC <= self.max_SOC:
            actual_charge = charge_request
        else:
            actual_charge = self.max_SOC - self.current_SOC  # Battery cannot charge more than it has        
        
        self.step_pv_load = self.step_pv - self.step_load
        # A C T I O N S  L O G I C
        
        # CONDITION A - load is HIGHER than pv production (there is a shortge)
        if self.step_pv_load < 0:
            if action < 0: # CONDITION A1 - the batery Discharges

                self.step_pv_load_plus_batery_discharge = self.step_pv_load + actual_discharge
                self.current_SOC -= actual_discharge 

                if self.step_pv_load_plus_batery_discharge > 0: # CONDITION A1A - the energy shortage is satisfied by batery,
                    to_grid += min(self.step_pv_load_plus_batery_discharge, self.max_export) # then any surplus to grid.

                else: # CONDITION A1B - the energy shortage is NOT satisfied by batery, 
                    from_grid += (- self.step_pv_load_plus_batery_discharge) # then shortage supplemented from grid

            elif action > 0: # CONDITION A2 - despite the shortage the batery charges

                self.current_SOC += actual_charge
                from_grid += (- self.step_pv_load) # then shortage is imported from the grid, 
                from_grid += actual_charge # and energy is imported for charging. 

        else: # CONDITION B the load is LOWER than pv production (there is a surplus)

            if action >= 0: # CONDITION B1 - the batery Charges
                self.step_pv_load_minus_batery_charge = self.step_pv_load - actual_charge
                self.current_SOC += actual_charge # charge the batery

                if self.step_pv_load_minus_batery_charge > 0: # CONDITION B1A - the surplus persists after charging batery,
                    to_grid += min(self.step_pv_load_minus_batery_charge, self.max_export) # then it is sold to the grid

                else:   # CONDITION B1B - the charge is higher than surplus
                    from_grid += ( - self.step_pv_load_minus_batery_charge)

            if action < 0: # CONDITION B2 - Despite there being a surplus the batery discharges
                self.current_SOC -= actual_discharge
                to_grid += self.step_pv_load + actual_discharge        
        

        # R E W A R D
        export_revenue = to_grid * self.ep_export_price
        import_cost = from_grid * self.step_import_price
        
        import_no_battery_cost = 0
        if self.step_pv_load < 0:
            import_no_battery_cost = (- self.step_pv_load) * self.step_import_price
        
        minimised = import_no_battery_cost - import_cost
        
        
        reward = float(- import_cost + export_revenue)
        
        self.previous_SOC = self.observation[0] * self.batery_cap
        
        self.observation[0] = self.current_SOC / self.batery_cap
        self.observation[1] = self.previous_SOC / self.batery_cap
        
        self.current_step += 1
        
        if self.current_step < 166:
            self.step_pv = self.pv[self.current_step]
            self.next_step_pv = self.pv[self.current_step + 1]
            self.two_next_step_pv = self.pv[self.current_step + 2]

            self.step_load = self.load[self.current_step]
            self.next_step_load = self.load[self.current_step + 1]
            self.two_next_step_load = self.load[self.current_step + 2]

            self.step_pv_load = self.step_pv - self.step_load
            self.OBS_step_pv_load = (self.step_pv_load  - (-1665)) / (3756 - (-1665))
            self.observation[2] = self.OBS_step_pv_load
                                    
            self.next_step_pv_load = self.next_step_pv - self.next_step_load
            self.OBS_next_step_pv_load = (self.next_step_pv_load  - (-1665)) / (3756 - (-1665))
            self.observation[3] = self.OBS_next_step_pv_load
                                    
            self.two_next_step_pv_load = self.two_next_step_pv - self.two_next_step_load
            self.OBS_two_next_step_pv_load = (self.two_next_step_pv_load  - (-1665)) / (3756 - (-1665))                
            self.observation[4] = self.OBS_two_next_step_pv_load
            
        elif self.current_step == 166:
            self.step_pv = self.pv[self.current_step]
            self.next_step_pv = self.pv[self.current_step + 1]

            self.step_load = self.load[self.current_step]
            self.next_step_load = self.load[self.current_step + 1]

            self.step_pv_load = self.step_pv - self.step_load
            self.OBS_step_pv_load = (self.step_pv_load  - (-1665)) / (3756 - (-1665))
            self.observation[2] = self.OBS_step_pv_load
                                    
            self.next_step_pv_load = self.next_step_pv - self.next_step_load
            self.OBS_next_step_pv_load = (self.next_step_pv_load  - (-1665)) / (3756 - (-1665))            
            self.observation[3] = self.OBS_next_step_pv_load
            self.observation[4] = 0
            
        else: 
            self.step_pv = self.pv[self.current_step]
            self.step_load = self.load[self.current_step]
            self.step_pv_load = self.step_pv - self.step_load
            self.OBS_step_pv_load = (self.step_pv_load  - (-1665)) / (3756 - (-1665))   
            self.observation[2] = self.OBS_step_pv_load
            self.observation[3] = 0
            self.observation[4] = 0
            
        self.OBS_step_import_price = self.rate[self.current_step] / 1.45
        
        self.observation[5] = self.OBS_step_import_price
                
        self.observation[7] = self.hour_of_day[self.current_step] / 24
        
        self.current_SOC -= self.leak
        
        
        info = {"check_1": self.step_pv_load,
               "check_2": self.current_step}
        
        
        truncated = False
        done = self.current_step == 167
        
        return self.observation, reward, done, False, info

In [8]:
env = BES_env(weekly_JA_OUTPUT_arr, weekly_elec_consum_arr, import_price_rate, import_price_train_arr, hour)
env_test = BES_env_test(weekly_JA_OUTPUT_arr, weekly_elec_consum_arr, import_price_rate, import_price_train_arr, hour)

In [9]:
check_env(env)

  reward = float(- import_cost + export_revenue)
  self.observation[0] = self.current_SOC / self.batery_cap


In [11]:
def test_ep(episodes, environment):    
    for episode in range(episodes):
        done = False
        obs = environment.reset()
        step = 0
        print(obs, "\n")
        while not done:
            step += 1
            random_action = environment.action_space.sample()
            obs, reward, done, trun, info = environment.step(random_action)
            
            
            # Extracting the 2nd and 3rd key-value pairs
            keys = list(info.keys())
            values = list(info.values())

            # Getting the 2nd key-value pair
            zeroth_key = keys[0]
            zeroth_value = values[0]

            # Getting the 3rd key-value pair

            sixth_key = keys[1]
            sixth_value = values[1]
            
            print("STEP:", step)
            print("ACT","\n",  random_action)
            print("REW","\n", reward)
            print("OBS","\n",  obs)
            print(zeroth_key, zeroth_value, sixth_key, sixth_value)
            print("\n")

In [28]:
test_ep(1, env)

(array([2.1386117e-01, 2.1386117e-01, 2.4821480e-01, 2.8020087e-01,
       2.8020087e-01, 3.0470823e-04, 3.9215687e-01, 0.0000000e+00,
       2.0218397e-02, 9.5715517e-01, 1.4843859e-01], dtype=float32), {}) 

STEP: 1
ACT 
 [0.5728728]
REW 
 -0.5205783247947693
OBS 
 [0.45726895 0.21386117 0.28020087 0.28036773 0.28793603 0.5862069
 0.39215687 0.04166667 0.0202184  0.95715517 0.14843859]
check_1 20 check_2 1


STEP: 2
ACT 
 [0.27782226]
REW 
 -0.24864453077316284
OBS 
 [0.5753128  0.45726895 0.28036773 0.28793603 0.260816   0.5862069
 0.39215687 0.08333334 0.0202184  0.95715517 0.14843859]
check_1 20 check_2 2


STEP: 3
ACT 
 [0.9522243]
REW 
 -0.6951982975006104
OBS 
 [0.9799031  0.5753128  0.28793603 0.260816   0.31298015 0.5862069
 0.39215687 0.125      0.0202184  0.95715517 0.14843859]
check_1 20 check_2 3


STEP: 4
ACT 
 [0.26931813]
REW 
 -0.07734070718288422
OBS 
 [1.         0.9799031  0.260816   0.31298015 0.33186737 0.5862069
 0.39215687 0.16666667 0.0202184  0.95715517 0.148

  reward = float(- import_cost + export_revenue)
  self.observation[0] = self.current_SOC / self.batery_cap


In [29]:
def test_multi(episodes, environment):
    
    start_time = time.time()  
    
    for episode in range(1, episodes+1):
        state = environment.reset()
        done = False
        reward = 0
        timer = 0
        tot_reward = 0
        
        while not done:
            action = environment.action_space.sample()
            n_state, reward, done, truncated, info = environment.step(action)
            timer += 1
            tot_reward += reward
        # Extracting the 2nd and 3rd key-value pairs
        keys = list(info.keys())
        values = list(info.values())


        
        print("Episode:{} Reward:{}". format(episode, tot_reward), "\n")

    end_time = time.time()
    print(f"Execution time: {end_time - start_time} seconds")

In [30]:
test_multi(10, env)

Episode:1 Reward:-23.880898295756197 

Episode:2 Reward:-7.606835494749248 

Episode:3 Reward:-8.599292648024857 

Episode:4 Reward:-7.863065324752824 

Episode:5 Reward:-18.120795380556956 

Episode:6 Reward:-70.13382620131597 

Episode:7 Reward:-10.143950617610244 

Episode:8 Reward:-11.940314072619971 

Episode:9 Reward:-29.989696835167706 

Episode:10 Reward:-9.471755528822541 

Execution time: 0.11835527420043945 seconds


  reward = float(- import_cost + export_revenue)
  self.observation[0] = self.current_SOC / self.batery_cap


In [31]:
from typing import Callable
def make_env(rank: int, seed: int = 0) -> Callable:
    def _init() -> gym.Env:
        random.seed(seed + rank)
        np.random.seed(seed + rank) 
        env = BES_env(weekly_JA_OUTPUT_arr, weekly_elec_consum_arr, import_price_rate, import_price_train_arr, hour)
        env.reset(seed=seed + rank)
        return env

    return _init
# Number of environments to run in parallel
num_cpu = 16
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])   

In [33]:
log_path = "./logs/"
eval_callback = EvalCallback(env_test, eval_freq=10000,  n_eval_episodes=200, best_model_save_path = "C:/Users/kubaw/Desktop/DELFT/THESIS/CODE/TEST_MODELS_BATERY/1/",
                             log_path = log_path, deterministic=True, render=False)


In [8]:
policy_kwargs = dict(net_arch=dict(pi=[2048, 1024, 1024, 512, 256], vf=[2048, 1024, 1024, 512, 256]))

In [34]:
def linear_schedule(initial_value):
    """
    Returns a function that computes a linearly decreasing value from initial_value to 0.0001.
    """
    def func(progress_remaining):
        return 0.00005 + (initial_value - 0.000001) * progress_remaining
    return func

learning_rate = linear_schedule(0.00005)

In [None]:
model38 = PPO("MlpPolicy", env, verbose=1, gae_lambda = 0.95, n_epochs=16, batch_size = 1024, n_steps=2048, 
              policy_kwargs = policy_kwargs, learning_rate = learning_rate, tensorboard_log = "C:/Users/kubaw/Desktop/DELFT/THESIS\CODE/TEST_MODELS/LOGS/logs")
TIMESTEPS = 30000000
model38.learn(total_timesteps = TIMESTEPS, callback=eval_callback)