In [3]:
# imports
import pandas as pd
import numpy as np
import numpy_financial as npf
import random  
import matplotlib.pyplot as plt
import time

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO, DQN, A2C
from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback, EvalCallback
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from sb3_contrib import RecurrentPPO

from environment_fx_no_env import calculate_import_export, test1, test2, test3, evaluate1, evaluate2, basepolicy

import gymnasium as gym
from gymnasium import spaces

import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
from typing import Callable

import torch
import torch as th
from torch import nn
import torch.nn as nn




In [14]:
# import and modify data

# Assuming the file is a CSV and specifying the correct path and filename
file_path = r"file_path"

# Use pandas to read the CSV file
JA_60 = pd.read_csv(file_path + "/JA_60")
JA_240 = pd.read_csv(file_path + "/JA_240")

elec_df = pd.read_csv(file_path + "/hourly_consumption_gemany2.csv")
import_price = pd.read_csv(file_path + "/electricity_tariff.csv")

#elec_df = elec_df * 1000
elec_df = elec_df.drop('HourOfYear', axis=1)

elec_df['hour_of_day'] = np.arange(8760) % 24
elec_df['day_of_week'] = np.arange(8760) // 24 % 7  # 0 is Monday, 6 is Sunday

# Define rates
peak_rate = 1.45
normal_rate = 1
off_peak_rate = 0.85

# Function to determine rate based on hour and day
def determine_rate(hour, day):
    if day < 5:  # Monday to Friday
        if 16 <= hour < 21:  # 4pm to 9pm
            return peak_rate
        elif 6 <= hour < 10:  # 7am to 9am and 10am to 3pm
            return normal_rate
        else:  # Off-peak times
            return off_peak_rate
    else:  # Weekend
        if 16 <= hour < 21:  # 4pm to 9pm
            return normal_rate
        else:  # Off-peak times
            return off_peak_rate
    
# Apply the function to each row to determine the rate
elec_df['rate'] = elec_df.apply(lambda row: determine_rate(row['hour_of_day'], row['day_of_week']), axis=1)

import_price_df = import_price.drop(columns=['x'])
import_price_df = import_price_df[:-26]

train_cols = random.sample(list(import_price_df.columns), 7000)
import_price_train = import_price_df[train_cols]
test_cols = [col for col in import_price_df.columns if col not in train_cols]
import_price_test = import_price_df[test_cols]

Eff = pd.read_csv(file_path + "/Efficency_impr")
Eff = (Eff)/100 + 1

CAPEX = pd.read_csv(file_path + "/CAPEX_JA.csv")
CAPEX_JA = (CAPEX[:26])

train_cols_CAPEX = random.sample(list(CAPEX_JA.columns), 7000)
test_cols_CAPEX = [col for col in CAPEX_JA.columns if col not in train_cols_CAPEX]

CAPEX_JA_train = CAPEX_JA[train_cols_CAPEX]
CAPEX_JA_test = CAPEX_JA[test_cols_CAPEX]

train_cols_Eff = random.sample(list(Eff.columns), 7000)
test_cols_Eff = [col for col in Eff.columns if col not in train_cols_Eff]

Eff_train = Eff[train_cols_Eff]
Eff_test = Eff[test_cols_Eff]

JA_60_arr = (np.array(JA_60.T)).flatten()
JA_240_arr = (np.array(JA_240.T)).flatten()

Eff_train_arr = np.array(Eff_train.T)
Eff_test_arr = np.array(Eff_test.T)

CAPEX_JA_train_arr = np.array(CAPEX_JA_train.T)
CAPEX_JA_test_arr = np.array(CAPEX_JA_test.T)

elec_consum_arr = np.array(elec_df["Consumption"])
import_price_rate = np.array(elec_df["rate"])

import_price_train_arr = np.array(import_price_train.T)
import_price_test_arr = np.array(import_price_train.T)

grid_factor = pd.read_csv(file_path + "/grid_factor.csv")
grid_factor =  grid_factor.T

train_cols_grid = random.sample(list(grid_factor.columns), 7000)
grid_factor_train = grid_factor[train_cols_grid]
test_cols_grid = [col for col in grid_factor.columns if col not in train_cols]
grid_factor_test = grid_factor[test_cols_grid]

grid_factor_train_arr = np.array(grid_factor_train.T)
grid_factor_test_arr = np.array(grid_factor_test.T)

pv_co2 = pd.read_csv(file_path + "/pv_emissions.csv")
pv_co2_arr = np.array(pv_co2)
pv_co2_arr = np.insert(pv_co2_arr, 0, 1.620)

In [15]:
class TrainEnvironment(gym.Env):
    def __init__(self, PV_90_arr, PV_270_arr, elec_consum_arr, import_price_rate, import_tariff, efficency, CAPEX):
        
        # Price per watthour
        self.import_price_df = import_tariff
        self.import_price_at_zero = np.float32(0.00035)
        self.import_price_rate = import_price_rate
        
        # Energy Balance
        self.PV_90_arr = PV_90_arr
        self.PV_270_arr = PV_270_arr
        self.elec_df = elec_consum_arr
        self.max_export = 4000
        self.number_of_panels = 32
        
        # Degradation
        self.deg_mu = 0.82 # Trina: 1.19, JA: 0.82, Maxeon: 0.67
        self.deg_std = 0.555 
        self.phi = 30 # Trina: 15, JA: 30, Maxeon: 50
        
        # Efficency Development
        self.efficency_develop_df = efficency
        self.efficency_at_zero = 1.0
        
        # Costs
        self.power_at_zero = 415  # Trina: 265, JA: 415, Maxeon: 435
        self.cost_per_Wp_df_at_zero = 0.69 # Trina: 0.36, JA: 0.69, Maxeon: 1.58
        self.cost_per_Wp_df = CAPEX
        self.initial_other_costs = 150
        
        self.operational_cost = 16.8
        
        self.loan_interest_rate = 1.10
        self.normal_interest_rate = 1.02
        
        self.low_budget = 0 # Low budget: 0, High Budget: 750
        self.high_budget = 750 # Low budget: 750, High Budget: 1500
                        
        # Spaces and length
        self.action_space = spaces.MultiDiscrete([self.number_of_panels // 2, self.number_of_panels // 2])
        self.observation_space = spaces.Box(0, 1.25, shape=(self.number_of_panels + 7,))
        self.episode_len = 25
        self.months_per_timestep = 12
        
    def _get_obs(self):
        
        return self.observation
    
    def calculate_import_export(self, elec_df, export_price, import_price):
        
        """
        Calculate the annual Wh of energy exported to the grid (exported) and saved (minimised)
        """
        
        PV_90_tot = self._get_obs()[0:self.number_of_panels // 2].sum() * self.PV_90_arr 
        PV_270_tot = self._get_obs()[(self.number_of_panels // 2) : self.number_of_panels].sum() * self.PV_270_arr 
        
        AC_OUTPUT_tot = PV_90_tot + PV_270_tot
        
        exported = (AC_OUTPUT_tot - self.elec_df).clip(min=0, max = self.max_export)        
        export_revenue = (export_price * exported).sum()

        
        minimised = AC_OUTPUT_tot - exported 
        minimised_revenue = (minimised * (self.import_price_rate * import_price)).sum()
        

        return export_revenue, AC_OUTPUT_tot, minimised_revenue
    
    def reset(self, seed=None):
        
        """
        Reset the environment to the original state at t=1
        """
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
        
        # Panels
        self.init_obs = np.random.uniform(0, 1, size=self.number_of_panels).astype(np.float32)
        self.init_obs = np.where(self.init_obs < 0.5, 0.0, np.random.uniform(0.85, 1.0, size=self.number_of_panels))

        # Combine all initialization into a single step for efficiency
        self.import_price_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min()) / (self.import_price_df.max().max() - self.import_price_df.min().min())
        self.FiT_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min() * 0.33) / (self.import_price_df.max().max() - self.import_price_df.min().min() * 0.33)
        self.efficency_at_zero_norm = (self.efficency_at_zero - 0.999) / (1.156 - 0.999)
        self.panel_cost_and_inverter_at_zero_norm = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
        
        self.current_budget_constraint = np.random.randint(self.low_budget, self.high_budget)
        self.next_step_budget_constraint = 0
        
        
        # Complete observation initialization in one go
        self.observation = np.concatenate([
            self.init_obs,
            [self.import_price_at_zero_norm, self.FiT_at_zero_norm, self.efficency_at_zero_norm, 
             self.panel_cost_and_inverter_at_zero_norm, 0., 0., 0.]
        ]).astype(np.float32)

        self.previous_observation = self.observation.copy()

        # RANDOM IMPORT PRICE
        self.random_import_price = self.import_price_df[np.random.choice(self.import_price_df.shape[0])] 

        # RANDOM EFFICENCY
        self.random_efficency_develop = self.efficency_develop_df[np.random.choice(self.efficency_develop_df.shape[0])]   
        
        # RANDOM COST PER WP
        self.random_cost_per_Wp = self.cost_per_Wp_df[np.random.choice(self.cost_per_Wp_df.shape[0])]   
        
        
        self.episode_len = 25  
    
        info = {}
        
        # RESET BALANCES
        self.fin_balance_tot = 0
        self.reward_tot = 0
        self.env_balance_tot = 0
        self.produced = 0
        self.other_costs = 0
        self.FiT = 0.0004
        self.next_FiT = 0.0004

        self.total_cash_flow = []
        self.annual_cash_flow = 0
                
        self.due_loans = [0, 0, 0, 0] 
        self.current_interest = 0
        self.step_total_interest = 1
        self.survival = np.zeros(self.number_of_panels, dtype=np.float32)
        self.resale_values = array_of_zeros = np.zeros(self.number_of_panels, dtype=np.float32)
        
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)
        
        self.two_year_ago_interest = 0
        self.first_year_interest = []
        self.second_year_interest = [0]
        self.third_year_interest = [0, 0]
        self.fourth_year_interest = [0, 0, 0]
        self.next_year_total = 0
        
        self.survival = np.zeros(self.number_of_panels, dtype=np.float32)
    
        return self.observation, info
    
    def calculate_resale(self, initial_panel_cost, indices):
        
        self.resale_values[indices] = initial_panel_cost
        
        self.resale_values = self.resale_values * 0.85
        
        for count, i in enumerate(self.broke):
            if i == 1:
                self.resale_values[count] = 0
        
        resale_step = self.resale_values[indices].sum()
        
        return resale_step
    
    def calculate_panel_inv_cost(self, cost_per_Wp):
        
        PW_ep = self.efficency_develop * self.power_at_zero
        
        panel_cost_and_inverter = PW_ep * cost_per_Wp
        
        return panel_cost_and_inverter
        
    def calculate_penalty(self, current_step, annual_expense):
              
        year = 25 - current_step
        
        if year > 0:
            self.current_budget_constraint = self.next_step_budget_constraint    
            
        
        self.current_interest = self.next_year_total
        annual_expense = (-annual_expense)
        value = 0 
        loan = 0
        annual_interest = 0

        if annual_expense > self.current_budget_constraint:
            loan = (self.current_budget_constraint - annual_expense)
            value = annual_expense / self.current_budget_constraint
            periods = 2 if value < 2 else 3 if value < 3 else 4

            annual_interest = loan / periods
            interest_multiplier = 1

            for i in range(4):
                if i < periods:
                    self.due_loans[i] = annual_interest * interest_multiplier
                    interest_multiplier *= self.loan_interest_rate
                else:
                    self.due_loans[i] = 0
        else:
             self.due_loans = [0, 0, 0, 0]
    
        self.first_year_interest.append(self.due_loans[0])
        self.second_year_interest.append(self.due_loans[1])
        self.third_year_interest.append(self.due_loans[2])
        self.fourth_year_interest.append(self.due_loans[3])
    
    
        self.next_year_total = self.first_year_interest[year] + self.second_year_interest[year] + self.third_year_interest[year] + self.fourth_year_interest[year]
        
        self.next_step_budget_constraint = np.random.randint(self.low_budget, self.high_budget) * self.step_total_interest
        current_budget_observation = (self.next_step_budget_constraint - self.low_budget * self.step_total_interest) / (self.high_budget * self.step_total_interest - self.low_budget * self.step_total_interest) 
        self.observation[self.number_of_panels + 6] = current_budget_observation
                
        return self.current_interest, self.due_loans, self.next_year_total
        
    def calculate_total_CAPEX(self, action_step, panel_cost_and_inverter):
        """
        Calculate CAPEX each step in a vectorized manner.
        """
        BOS = panel_cost_and_inverter * 0.55
        number_installed = int(np.sum(action_step))

        # Calculate costs from module and inverter
        panel_cost_and_inverter_step = panel_cost_and_inverter * number_installed

        # Calculate other installation costs
        if number_installed == 0:
            other_costs = 0
        elif number_installed == 1:
            other_costs = self.initial_other_costs * self.step_total_interest
        else:
            discounts = 0.9 ** np.arange(number_installed)
            other_costs = (self.initial_other_costs * self.step_total_interest * discounts).sum()

        # Calculate BOS costs using vector operations
        is_new_installation = (self.previous_observation[:number_installed] == 0) & (action_step[:number_installed] == 1)
        is_replacement = (self.previous_observation[:number_installed] > 0) & (action_step[:number_installed] == 1)
        BOS_cost = np.sum(BOS * is_new_installation) + np.sum((BOS / 2) * is_replacement)

        # Sum total CAPEX
        total_CAPEX = panel_cost_and_inverter_step + BOS_cost + other_costs

        return total_CAPEX, panel_cost_and_inverter
        
    def failure(self, actions):
        
        beta = 3  # Shape parameter

        # Determine which panels are active based on the actions and previous observations.
        if self.episode_len == 24:
            active_panels = (self.observation[:self.number_of_panels] > 0.85)
        else:
            active_panels = (self.observation[:self.number_of_panels] == self.efficency_develop)

        # Calculate lifespan for all active panels at once
        lifespans = np.random.weibull(beta, self.number_of_panels) * self.phi
        lifespans = np.where(active_panels, lifespans, 0)  # Apply lifespan only to active panels

        # Adjust survival times based on episode length
        self.survival[:self.number_of_panels] = np.where(
            active_panels,
            np.abs(lifespans.astype(int)) + np.abs(self.episode_len - 25),
            self.survival[:self.number_of_panels]
        )

        return self.survival

    def calculate_FiT(self, episodes, import_price):
            
        self.FiT = import_price
            
        if episodes == 25:
            self.FiT = self.FiT
            
        elif episodes == 24 or episodes == 23:
            self.FiT = self.FiT * 0.64
            
        elif episodes == 22:
            self.FiT = self.FiT * 0.46
            
        elif episodes == 21:
            self.FiT = self.FiT * 0.55
            
        elif episodes < 20:
            self.FiT = self.FiT * 0.33
            
        elif episodes == 20:
            self.FiT = self.FiT * 0.37
            
        return self.FiT
                        
    def step(self, action):
        
        """
        defines actions, reward etc.
        """
        
        # RESET THE ANNUAL BALANCES
        self.total_CAPEX = 0
        self.pv_costs = 0
        self.fin_balance = 0
        self.number_installed = 0
        current_penalty = 0
        self.other_costs = 0
        next_step_penalty = 0
        self.step_total_interest = self.step_total_interest * self.normal_interest_rate
        current_operational_costs = self.operational_cost * self.step_total_interest
        
        
        self.cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
        self.import_price = self.random_import_price[abs(self.episode_len - 25)]
        self.efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
           
        self.panel_cost_and_inverter = self.calculate_panel_inv_cost(self.cost_per_Wp)
        FiT = self.calculate_FiT(self.episode_len, self.import_price)
        
        reward = 0   
        
        # Find indices of the lowest 'action' values in previous_observation
        indices_0 = np.argsort(self.previous_observation[:(self.number_of_panels // 2)])[:action[0]]
        indices_1 = np.argsort(self.previous_observation[(self.number_of_panels // 2):self.number_of_panels])[:action[1]]

        indices = np.concatenate([indices_0, indices_1 + (self.number_of_panels // 2)])
        
        # Replace these indices in the observation with efficiency_develop
        self.observation[:self.number_of_panels][indices] = self.efficency_develop
        
        # Copy over the other values from previous_observation to observation
        mask = np.ones(len(self.previous_observation[:self.number_of_panels]), dtype=bool)
        mask[indices] = False
        self.observation[:self.number_of_panels][mask] = self.previous_observation[:self.number_of_panels][mask]

        replaced_panels = np.zeros(len(self.previous_observation[:self.number_of_panels]), dtype=int)
        replaced_panels[indices] = 1

        instaltion = (self.observation[:self.number_of_panels] > 0).astype(int)
        self.pv_costs -= instaltion.sum() * current_operational_costs

        actions_step = np.array(replaced_panels)
        
        action = action[0] + action[1]

            
        if action > 0:
            step_CAPEX, panel_cost_and_inverter = self.calculate_total_CAPEX(actions_step, self.panel_cost_and_inverter)
            self.pv_costs -= step_CAPEX
            
        else:
            panel_cost_and_inverter = 0
                
        next_observation = self._get_obs()

        
        # Calculate the Reslae value
        resale = self.calculate_resale(panel_cost_and_inverter, indices) #  ***
        
        self.pv_costs += resale
 
        
        # CALCULATE THE BUDGET INTEREST
        current_penalty, due_loans, next_step_penalty = self.calculate_penalty(self.episode_len, self.pv_costs)

        
        # CALCULATE THE ENERGY YIELD
        exported_revenue, AC_OUTPUT_tot, minimised_revenue = self.calculate_import_export(self.elec_df, FiT, self.import_price)        
        
        pv_costs_observation = - self.pv_costs / 10000
        self.observation[self.number_of_panels + 4] = pv_costs_observation
        
        next_step_penalty_observation = - next_step_penalty / 8000
        self.observation[self.number_of_panels + 5] = next_step_penalty_observation
        
        
        # CALCULATE STEP BALANCES
        self.fin_balance += self.pv_costs
        self.fin_balance += current_penalty
        self.fin_balance += float(exported_revenue + minimised_revenue)
        
        # CALCULATE TOTAL BALANCES
        self.fin_balance_tot += self.fin_balance                
        
        # SUBSTRACT 1 FOR TIMESTEP
        self.episode_len -= 1
        done = self.episode_len <= 0
        
        #reward = self.fin_balance_tot / 1000 if done else 0
        reward = self.fin_balance / 1000
        
        # FAILURE
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)
        survival = self.failure(actions_step)
        
        for c, p in enumerate(survival):
            
            if c < self.number_of_panels:

                if p - 1 <= abs(self.episode_len - 24):
                    self.broke[c] = 1
                    self.observation[c] = 0
        
        # DEGRADATION RATE
        # Applying degradation only to panels that are operational (above 0.1 efficiency)
        active_panels = self.observation[:self.number_of_panels] > 0.1
        degradations = np.random.normal(self.deg_mu, self.deg_std, size=self.number_of_panels) / 100
        self.observation[:self.number_of_panels][active_panels] -= degradations[active_panels]
        
        if not done: 
        
            self.next_cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
            self.next_import_price = self.random_import_price[abs(self.episode_len - 25)]
            self.next_efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
            next_FIT = self.calculate_FiT(self.episode_len, self.next_import_price)
        
            price_observation = (self.next_import_price - 0.00022499) / (0.0020798 - 0.00022499)
            self.observation[self.number_of_panels] = price_observation

            FiT_observation = (next_FIT - 0.00022499 * 0.33) / (0.0020798 - 0.00022499 * 0.33)
            self.observation[self.number_of_panels + 1] = FiT_observation

            eff_observation = (self.next_efficency_develop - 0.999) / (1.156 - 0.999)
            self.observation[self.number_of_panels + 2] = eff_observation

            cost_per_Wp_observation = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
            self.observation[self.number_of_panels + 3] = cost_per_Wp_observation
        
        info = {"step financial balance (eur):": self.fin_balance,
               "total financial balance: (eur)": self.fin_balance_tot,
               "internal rate of return": 0,
               "current_interest": resale,
                "net present value": 0}
         
        
        self.previous_observation = self.observation.copy()
        
        return self.observation, reward, done, False, info

In [16]:
class TestEnvironment(gym.Env):
    def __init__(self, PV_90_arr, PV_270_arr, elec_consum_arr, import_price_rate, import_tariff, efficency, CAPEX, 
                 GRID_FACTOR, pv_co2_arr):
        
        # Price per watthour
        self.import_price_df = import_tariff
        self.import_price_at_zero = np.float32(0.00035)
        self.import_price_rate = import_price_rate
        
        # Energy Balance
        self.PV_90_arr = PV_90_arr
        self.PV_270_arr = PV_270_arr
        self.elec_df = elec_consum_arr
        self.max_export = 4000
        self.number_of_panels = 32
        
        # Degradation
        self.deg_mu = 0.82 # Trina: 1.19, JA: 0.82, Maxeon: 0.67
        self.deg_std = 0.555 
        
        self.phi = 30 # Trina: 15, JA: 30, Maxeon: 50
        
        # Efficency Development
        self.efficency_develop_df = efficency
        self.efficency_at_zero = 1.0
        
        # Costs
        self.power_at_zero = 415  # Trina: 265, JA: 415, Maxeon: 435
        self.cost_per_Wp_df_at_zero = 0.69 # Trina: 0.36, JA: 0.69, Maxeon: 1.58
        self.cost_per_Wp_df = CAPEX
        self.initial_other_costs = 150
        
        self.operational_cost = 16.8
        
        self.loan_interest_rate = 1.10
        self.normal_interest_rate = 1.02
        
        self.low_budget = 0 # Low budget: 0, High Budget: 750
        self.high_budget = 750 # Low budget: 750, High Budget: 1500
        
        # Spaces and length
        self.action_space = spaces.MultiDiscrete([self.number_of_panels // 2, self.number_of_panels // 2])
        self.observation_space = spaces.Box(0, 1.25, shape=(self.number_of_panels + 7,))
        self.episode_len = 25
        self.months_per_timestep = 12
        
        # Emission
        self.grid_factor_df = GRID_FACTOR 
        self.grid_factor_at_zero = 0.553 
        self.pv_emission = pv_co2_arr * self.power_at_zero
        
    def _get_obs(self):
        
        return self.observation
    
    def calculate_import_export(self, elec_df, export_price, import_price):
        
        """
        Calculate the annual Wh of energy exported to the grid (exported) and saved (minimised)
        """
        
        PV_90_tot = self._get_obs()[0:self.number_of_panels // 2].sum() * self.PV_90_arr 
        PV_270_tot = self._get_obs()[(self.number_of_panels // 2) : self.number_of_panels].sum() * self.PV_270_arr 
        
        AC_OUTPUT_tot = PV_90_tot + PV_270_tot

        exported = (AC_OUTPUT_tot - self.elec_df).clip(min=0, max = self.max_export)  
        excess_energy = (AC_OUTPUT_tot - self.elec_df - self.max_export).clip(min=0)
        
        export_revenue = (export_price * exported).sum()

        
        minimised = AC_OUTPUT_tot - exported 
        minimised_revenue = (minimised * (self.import_price_rate * import_price)).sum()
        
        AC_for_env = AC_OUTPUT_tot - excess_energy

        return export_revenue, AC_OUTPUT_tot, AC_for_env, minimised_revenue
    
    def reset(self, seed=None):
        
        """
        Reset the environment to the original state at t=1
        """
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
            
        # Panels
        self.init_obs = np.random.uniform(0, 1, size=self.number_of_panels).astype(np.float32)
        self.init_obs = np.where(self.init_obs < 0.5, 0.0, np.random.uniform(0.85, 1.0, size=self.number_of_panels))

        # Combine all initialization into a single step for efficiency
        self.import_price_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min()) / (self.import_price_df.max().max() - self.import_price_df.min().min())
        self.FiT_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min() * 0.33) / (self.import_price_df.max().max() - self.import_price_df.min().min() * 0.33)
        self.efficency_at_zero_norm = (self.efficency_at_zero - 0.999) / (1.156 - 0.999)
        self.panel_cost_and_inverter_at_zero_norm = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
                
        self.grid_factor_at_zero_norm = (self.grid_factor_at_zero - 0.553) / (0.553 - 0.00022499) 
        
        self.current_budget_constraint = np.random.randint(self.low_budget, self.high_budget)
        self.next_step_budget_constraint = 0
        
        
        # Complete observation initialization in one go
        self.observation = np.concatenate([
            self.init_obs,
            [self.import_price_at_zero_norm, self.FiT_at_zero_norm, self.efficency_at_zero_norm, 
             self.panel_cost_and_inverter_at_zero_norm, 0., 0., 0.]
        ]).astype(np.float32) #***

        self.previous_observation = self.observation.copy()

        # RANDOM IMPORT PRICE
        self.random_import_price = self.import_price_df[np.random.choice(self.import_price_df.shape[0])] 

        # RANDOM EFFICENCY
        self.random_efficency_develop = self.efficency_develop_df[np.random.choice(self.efficency_develop_df.shape[0])]   
        
        # RANDOM COST PER WP
        self.random_cost_per_Wp = self.cost_per_Wp_df[np.random.choice(self.cost_per_Wp_df.shape[0])]   
        
        # RANDOM Grid Factor
        self.random_grid_factor = self.grid_factor_df[np.random.choice(self.grid_factor_df.shape[0])]   #***
        
        self.episode_len = 25  
    
        info = {}
        
        # RESET BALANCES
        self.fin_balance_tot = 0
        self.reward_tot = 0
        self.env_balance_tot = 0
        self.produced = 0
        self.other_costs = 0
        self.FiT = 0.0004
        self.next_FiT = 0.0004
        self.resale_values = array_of_zeros = np.zeros(self.number_of_panels, dtype=np.float32)
        
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)
        self.total_cash_flow = []
        self.annual_cash_flow = 0
                
        self.due_loans = [0, 0, 0, 0] 
        self.current_interest = 0
        self.step_total_interest = 1
        
        self.two_year_ago_interest = 0
        self.first_year_interest = []
        self.second_year_interest = [0]
        self.third_year_interest = [0, 0]
        self.fourth_year_interest = [0, 0, 0]
        self.next_year_total = 0
        
        self.survival = np.zeros(self.number_of_panels, dtype=np.float32)
    
        return self.observation, info
    
    def emission_balance(self, pv_production, grid_factor, panel_emission, action_step):
        
        curtailed = (pv_production.sum() * grid_factor)/1000
        
        number_installed = int(np.sum(action_step))
        
        panel_emission_tot = number_installed * panel_emission
        
        emission_balance = curtailed - panel_emission_tot
        
        return emission_balance 
    
    def calculate_resale(self, initial_panel_cost, indices):
        
        self.resale_values[indices] = initial_panel_cost
        
        self.resale_values = self.resale_values * 0.85
        
        for count, i in enumerate(self.broke):
            if i == 1:
                self.resale_values[count] = 0
        
        resale_step = self.resale_values[indices].sum()
        
        return resale_step
    
    def calculate_panel_inv_cost(self, cost_per_Wp):
        
        PW_ep = self.efficency_develop * self.power_at_zero
        
        panel_cost_and_inverter = PW_ep * cost_per_Wp
        
        return panel_cost_and_inverter
    
    def calculate_irr_and_npv(self, pv_cost, minimised_revenue, export_revenue, penalty):
                
        """
        Calculates total cash flow of the project needed for the internal rate of return
        """ 
        self.expences = 0
        self.annual_cash_flow = 0
        initial_cost = 0
        
        self.expences = pv_cost
        self.annual_cash_flow = self.expences + export_revenue + minimised_revenue + penalty
        initial_cost_q, x = self.calculate_total_CAPEX(self.init_obs, self.panel_cost_and_inverter)
        #initial_cost = - initial_cost_q
        
        if self.episode_len == 24:
            self.total_cash_flow.append(initial_cost + self.annual_cash_flow) 
        else:
            self.total_cash_flow.append(self.annual_cash_flow) 
        
        return self.total_cash_flow
        
    def calculate_penalty(self, current_step, annual_expense):
              
        year = 25 - current_step
        
        if year > 0:
            self.current_budget_constraint = self.next_step_budget_constraint    
            
        
        self.current_interest = self.next_year_total
        annual_expense = (-annual_expense)
        value = 0 
        loan = 0
        annual_interest = 0

        if annual_expense > self.current_budget_constraint:
            loan = (self.current_budget_constraint - annual_expense)
            value = annual_expense / self.current_budget_constraint
            periods = 2 if value < 2 else 3 if value < 3 else 4

            annual_interest = loan / periods
            interest_multiplier = 1

            for i in range(4):
                if i < periods:
                    self.due_loans[i] = annual_interest * interest_multiplier
                    interest_multiplier *= self.loan_interest_rate
                else:
                    self.due_loans[i] = 0
        else:
             self.due_loans = [0, 0, 0, 0]
    
        self.first_year_interest.append(self.due_loans[0])
        self.second_year_interest.append(self.due_loans[1])
        self.third_year_interest.append(self.due_loans[2])
        self.fourth_year_interest.append(self.due_loans[3])
    
    
        self.next_year_total = self.first_year_interest[year] + self.second_year_interest[year] + self.third_year_interest[year] + self.fourth_year_interest[year]
        
        self.next_step_budget_constraint = np.random.randint(self.low_budget, self.high_budget) * self.step_total_interest
        current_budget_observation = (self.next_step_budget_constraint - self.low_budget * self.step_total_interest) / (self.high_budget * self.step_total_interest - self.low_budget * self.step_total_interest) 
        self.observation[self.number_of_panels + 6] = current_budget_observation
                
        return self.current_interest, self.due_loans, self.next_year_total
        
    def calculate_total_CAPEX(self, action_step, panel_cost_and_inverter):
        """
        Calculate CAPEX each step in a vectorized manner.
        """
        BOS = panel_cost_and_inverter * 0.55
        number_installed = int(np.sum(action_step))

        # Calculate costs from module and inverter
        panel_cost_and_inverter_step = panel_cost_and_inverter * number_installed

        # Calculate other installation costs
        if number_installed == 0:
            other_costs = 0
        elif number_installed == 1:
            other_costs = self.initial_other_costs * self.step_total_interest
        else:
            discounts = 0.9 ** np.arange(number_installed)
            other_costs = (self.initial_other_costs * self.step_total_interest * discounts).sum()

        # Calculate BOS costs using vector operations
        is_new_installation = (self.previous_observation[:number_installed] == 0) & (action_step[:number_installed] == 1)
        is_replacement = (self.previous_observation[:number_installed] > 0) & (action_step[:number_installed] == 1)
        BOS_cost = np.sum(BOS * is_new_installation) + np.sum((BOS / 2) * is_replacement)

        # Sum total CAPEX
        total_CAPEX = panel_cost_and_inverter_step + BOS_cost + other_costs

        return total_CAPEX, panel_cost_and_inverter
        
    def failure(self, actions):
        
        beta = 3  # Shape parameter

        # Determine which panels are active based on the actions and previous observations.
        if self.episode_len == 24:
            active_panels = (self.observation[:self.number_of_panels] > 0.85)
        else:
            active_panels = (self.observation[:self.number_of_panels] == self.efficency_develop)

        # Calculate lifespan for all active panels at once
        lifespans = np.random.weibull(beta, self.number_of_panels) * self.phi
        lifespans = np.where(active_panels, lifespans, 0)  # Apply lifespan only to active panels

        # Adjust survival times based on episode length
        self.survival[:self.number_of_panels] = np.where(
            active_panels,
            np.abs(lifespans.astype(int)) + np.abs(self.episode_len - 25),
            self.survival[:self.number_of_panels]
        )

        return self.survival

    def calculate_FiT(self, episodes, import_price):
            
        self.FiT = import_price
            
        if episodes == 25:
            self.FiT = self.FiT
            
        elif episodes == 24 or episodes == 23:
            self.FiT = self.FiT * 0.64
            
        elif episodes == 22:
            self.FiT = self.FiT * 0.46
            
        elif episodes == 21:
            self.FiT = self.FiT * 0.55
            
        elif episodes < 20:
            self.FiT = self.FiT * 0.33
            
        elif episodes == 20:
            self.FiT = self.FiT * 0.37
            
        return self.FiT
                        
    def step(self, action):
        
        """
        defines actions, reward etc.
        """
        
        # RESET THE ANNUAL BALANCES
        self.total_CAPEX = 0
        self.pv_costs = 0
        self.fin_balance = 0
        self.env_balance = 0
        self.number_installed = 0
        irr_fin = 0
        npv_fin = 0
        current_penalty = 0
        self.other_costs = 0
        next_step_penalty = 0
        self.step_total_interest = self.step_total_interest * self.normal_interest_rate
        current_operational_costs = self.operational_cost * self.step_total_interest
        
        
        self.cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
        self.import_price = self.random_import_price[abs(self.episode_len - 25)]
        self.efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
        self.grid_factor = self.random_grid_factor[abs(self.episode_len - 25)]
        self.step_pv_emission = (self.pv_emission[abs(self.episode_len - 25)] * self.efficency_develop) / 100000
           
        self.panel_cost_and_inverter = self.calculate_panel_inv_cost(self.cost_per_Wp)
        FiT = self.calculate_FiT(self.episode_len, self.import_price)
        
        reward = 0   
        
        # Find indices of the lowest 'action' values in previous_observation
        indices_0 = np.argsort(self.previous_observation[:(self.number_of_panels // 2)])[:action[0]]
        indices_1 = np.argsort(self.previous_observation[(self.number_of_panels // 2):self.number_of_panels])[:action[1]]

        indices = np.concatenate([indices_0, indices_1 + (self.number_of_panels // 2)])
        
        # Replace these indices in the observation with efficiency_develop
        self.observation[:self.number_of_panels][indices] = self.efficency_develop

        # Copy over the other values from previous_observation to observation
        mask = np.ones(len(self.previous_observation[:self.number_of_panels]), dtype=bool)
        mask[indices] = False
        self.observation[:self.number_of_panels][mask] = self.previous_observation[:self.number_of_panels][mask]

        replaced_panels = np.zeros(len(self.previous_observation[:self.number_of_panels]), dtype=int)
        replaced_panels[indices] = 1

        instaltion = (self.observation[:self.number_of_panels] > 0).astype(int)
        self.pv_costs -= instaltion.sum() * current_operational_costs

        actions_step = np.array(replaced_panels)

        action = action[0] + action[1]
        
        if action > 0:
            step_CAPEX, panel_cost_and_inverter = self.calculate_total_CAPEX(actions_step, self.panel_cost_and_inverter)
            self.pv_costs -= step_CAPEX
            
        else:
            panel_cost_and_inverter = 0
                
        next_observation = self._get_obs()

        # Calculate the Reslae value
        resale = self.calculate_resale(panel_cost_and_inverter, indices) #  ***
        
        self.pv_costs += resale

        
        # CALCULATE THE BUDGET INTEREST
        current_penalty, due_loans, next_step_penalty = self.calculate_penalty(self.episode_len, self.pv_costs)
        
        
        # CALCULATE THE ENERGY YIELD
        exported_revenue, AC_OUTPUT_tot, AC_for_env, minimised_revenue = self.calculate_import_export(self.elec_df, FiT, self.import_price)        
        
        
        # CALCULATE STEP EMISSIONS
        self.env_balance = self.emission_balance(AC_for_env, self.grid_factor, self.step_pv_emission, actions_step)
        
        self.env_balance_tot += self.env_balance
        
        pv_costs_observation = - self.pv_costs / 10000
        self.observation[self.number_of_panels + 4] = pv_costs_observation
        
        next_step_penalty_observation = - next_step_penalty / 8000
        self.observation[self.number_of_panels + 5] = next_step_penalty_observation
        
        
        # CALCULATE STEP BALANCES
        self.fin_balance += self.pv_costs
        self.fin_balance += current_penalty
        self.fin_balance += float(exported_revenue + minimised_revenue)
        
        # CALCULATE TOTAL BALANCES
        self.fin_balance_tot += self.fin_balance                
        
        # SUBSTRACT 1 FOR TIMESTEP
        self.episode_len -= 1
        done = self.episode_len <= 0
        
        # CALCULATE IRR, NPV AND CARBON INTENSITY
        total_cash_flow = self.calculate_irr_and_npv(self.pv_costs, exported_revenue, minimised_revenue, current_penalty)
        irr = npf.irr(total_cash_flow) * 100
        npv = npf.npv(0.04 ,total_cash_flow)
            
        # RETURNS AND CALCULATE REWARD
        if self.episode_len == 0:
            irr_fin = irr
            npv_fin = npv
        
        reward = self.fin_balance / 1000
        #reward = self.fin_balance_tot / 1000 if done else 0
        
        # FAILURE
         
        survival = self.failure(actions_step)
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)

        for c, p in enumerate(survival):
            
            if c < self.number_of_panels:

                if p - 1 <= abs(self.episode_len - 24):
                    self.broke[c] = 1

                    self.observation[c] = 0
        
        # DEGRADATION RATE
        # Applying degradation only to panels that are operational (above 0.1 efficiency)
        active_panels = self.observation[:self.number_of_panels] > 0.1
        degradations = np.random.normal(self.deg_mu, self.deg_std, size=self.number_of_panels) / 100
        self.observation[:self.number_of_panels][active_panels] -= degradations[active_panels]
        
        if not done: 
        
            self.next_cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
            self.next_import_price = self.random_import_price[abs(self.episode_len - 25)]
            self.next_efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
            self.next_grid_factor = self.random_grid_factor[abs(self.episode_len - 25)]
            next_FIT = self.calculate_FiT(self.episode_len, self.next_import_price)
        
            price_observation = (self.next_import_price - 0.00022499) / (0.0020798 - 0.00022499)
            self.observation[self.number_of_panels] = price_observation

            FiT_observation = (next_FIT - 0.00022499 * 0.33) / (0.0020798 - 0.00022499 * 0.33)
            self.observation[self.number_of_panels + 1] = FiT_observation

            eff_observation = (self.next_efficency_develop - 0.999) / (1.156 - 0.999)
            self.observation[self.number_of_panels + 2] = eff_observation

            cost_per_Wp_observation = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
            self.observation[self.number_of_panels + 3] = cost_per_Wp_observation
            
            grid_factor_observation = (self.next_grid_factor - 0.553) / (0.553 - 0.00022499)
            #self.observation[self.number_of_panels + 7] = cost_per_Wp_observation
        
        
        info = {"step financial balance (eur):": self.fin_balance,
               "total financial balance: (eur)": self.fin_balance_tot,
               "internal rate of return": irr_fin,
               "current_interest": current_penalty,
                "net present value": npv_fin,
               "enironmental balance": self.env_balance_tot}
         
        
        self.previous_observation = self.observation.copy()
        
        return self.observation, reward, done, False, info

In [17]:
env = TrainEnvironment(JA_60_arr, JA_240_arr, elec_consum_arr, import_price_rate, import_price_train_arr, Eff_train_arr, CAPEX_JA_train_arr)
env_test = TestEnvironment(JA_60_arr, JA_240_arr, elec_consum_arr, import_price_rate, import_price_test_arr, Eff_test_arr, 
                           CAPEX_JA_test_arr, grid_factor_test_arr, pv_co2_arr)

In [18]:
#check_env(env)
def test4(episodes, environment):    
    for episode in range(episodes):
        done = False
        obs = environment.reset()
        step = 0
        print(obs, "\n")
        while not done:
            step += 1
            random_action = environment.action_space.sample()
            obs, reward, done, trun, info = environment.step(random_action)
            
            
            # Extracting the 2nd and 3rd key-value pairs
            keys = list(info.keys())
            values = list(info.values())

            # Getting the 2nd key-value pair
            zeroth_key = keys[0]
            zeroth_value = values[0]

            # Getting the 3rd key-value pair

            sixth_key = keys[5]
            sixth_value = values[5]
            
            print("STEP:", step)
            print("ACT","\n",  random_action)
            print("OBS","\n",  obs)
            print(zeroth_key, zeroth_value, sixth_key, sixth_value)
            print("\n")

In [20]:
test3(1, env)

(array([0.        , 0.        , 0.99974245, 0.9552167 , 0.9129922 ,
       0.85152084, 0.8910442 , 0.        , 0.8731456 , 0.8863569 ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.9739295 ,
       0.88317055, 0.99905765, 0.89180017, 0.        , 0.9793925 ,
       0.        , 0.        , 0.9632497 , 0.        , 0.        ,
       0.        , 0.        , 0.06935652, 0.11512984, 0.00636943,
       0.7395664 , 0.        , 0.        , 0.        ], dtype=float32), {}) 

STEP: 1
ACT 
 [0 8]
OBS 
 [0.         0.         0.9926012  0.9361801  0.8982248  0.8408973
 0.8817504  0.         0.8557852  0.8766868  0.         0.
 0.         0.         0.         0.         0.997946   0.99029624
 0.983579   0.9625607  0.8741782  0.988195   0.8943921  0.9914194
 0.9760588  1.0018206  0.99068755 0.95381486 0.99135894 0.99299407
 0.         0.         0.0654806  0.07353458 0.01116958 0.7395664
 0.15748611 0.04587066 0.228     ]


In [15]:
test1(1000, env_test)

Episode:1 Reward:-2.5342593958167416 

total financial balance: (eur) -80418.55369434791 

internal rate of return nan 

net present value -51769.51169815731 

Episode:2 Reward:-3.647412439672943 

total financial balance: (eur) -80308.9055916943 

internal rate of return 294.152830666034 

net present value -51994.867591901944 

Episode:3 Reward:-2.719391640985068 

total financial balance: (eur) -93743.6933685955 

internal rate of return nan 

net present value -59182.75871330115 

Episode:4 Reward:-1.2503599397677134 

total financial balance: (eur) -69528.9024093443 

internal rate of return nan 

net present value -45658.886840428575 

Episode:5 Reward:-0.5886773663306339 

total financial balance: (eur) -67903.83693100762 

internal rate of return 753.3031100444558 

net present value -45317.608796142704 

Episode:6 Reward:-5.70775762690567 

total financial balance: (eur) -71179.2048637658 

internal rate of return 217.93306789473883 

net present value -43721.38999970594 

Epi

  value = annual_expense / self.current_budget_constraint


Episode:18 Reward:-2.279843965412834 

total financial balance: (eur) -76323.7962088045 

internal rate of return nan 

net present value -52619.086651227146 

Episode:19 Reward:-2.05616171648775 

total financial balance: (eur) -67584.64893408843 

internal rate of return 162.65554084919637 

net present value -45292.582108908675 

Episode:20 Reward:-0.09532370508377153 

total financial balance: (eur) -43924.884839871236 

internal rate of return 320.61356457596435 

net present value -27713.694842657365 

Episode:21 Reward:-1.8845726056455332 

total financial balance: (eur) -61486.425926682554 

internal rate of return nan 

net present value -41449.324976539596 

Episode:22 Reward:-2.0206869488630748 

total financial balance: (eur) -64099.61674412859 

internal rate of return 252.03758940743774 

net present value -42608.97846101613 

Episode:23 Reward:-4.474851746164064 

total financial balance: (eur) -74223.95826910475 

internal rate of return 187.80601707688422 

net present

Episode:81 Reward:-1.8430610581938753 

total financial balance: (eur) -82813.55299343639 

internal rate of return 159.59815014487225 

net present value -53268.24239528539 

Episode:82 Reward:-3.3399140567775976 

total financial balance: (eur) -68915.0639950152 

internal rate of return nan 

net present value -43888.7928325329 

Episode:83 Reward:-2.126526861797055 

total financial balance: (eur) -60332.01763239199 

internal rate of return 171.86727464598994 

net present value -39028.33518084645 

Episode:84 Reward:-3.7584417186052494 

total financial balance: (eur) -61735.48158193283 

internal rate of return nan 

net present value -38974.66253998677 

Episode:85 Reward:-0.22821262252634916 

total financial balance: (eur) -79975.98533025062 

internal rate of return 205.8541137789576 

net present value -52570.779286469595 

Episode:86 Reward:-4.864013405822677 

total financial balance: (eur) -80106.88273721113 

internal rate of return nan 

net present value -52736.686844

Episode:145 Reward:-2.6320782418596536 

total financial balance: (eur) -65792.32803753206 

internal rate of return nan 

net present value -42835.268588562394 

Episode:146 Reward:-0.9429190966199203 

total financial balance: (eur) -66814.45424110837 

internal rate of return 428.60039887653835 

net present value -48225.41491722569 

Episode:147 Reward:0.11203507267907845 

total financial balance: (eur) -60841.858836452484 

internal rate of return -89.14544125793982 

net present value -41174.8278333413 

Episode:148 Reward:-1.3170879442557357 

total financial balance: (eur) -81736.303555769 

internal rate of return 360.33706317002753 

net present value -51379.31726085735 

Episode:149 Reward:-1.5106091077203436 

total financial balance: (eur) -82500.55507781843 

internal rate of return nan 

net present value -53414.366130399016 

Episode:150 Reward:-2.6817439224040482 

total financial balance: (eur) -69390.73511301448 

internal rate of return 121.65680135162572 

net pre

Episode:195 Reward:0.5543484385673273 

total financial balance: (eur) -54880.649611911824 

internal rate of return -69.12592328427276 

net present value -36717.84134242754 

Episode:196 Reward:0.38839578687604265 

total financial balance: (eur) -42049.96279747793 

internal rate of return -54.31576849401951 

net present value -27428.96211991033 

Episode:197 Reward:6.240758852885374 

total financial balance: (eur) -19391.52977952073 

internal rate of return -5.507147202371854 

net present value -20452.420635063852 

Episode:198 Reward:-2.1968293000132864 

total financial balance: (eur) -82852.10288279038 

internal rate of return nan 

net present value -53670.8633559963 

Episode:199 Reward:-1.7168909753511088 

total financial balance: (eur) -47808.9741892406 

internal rate of return 668.0576105254255 

net present value -30430.17449875647 

Episode:200 Reward:0.5008128041539412 

total financial balance: (eur) -22373.30151619155 

internal rate of return -9.27961069279226 

Episode:248 Reward:-3.2360106235549733 

total financial balance: (eur) -57310.531462951614 

internal rate of return nan 

net present value -35270.0116304472 

Episode:249 Reward:0.055868208243675326 

total financial balance: (eur) -60614.03757553508 

internal rate of return -97.0550445437715 

net present value -40810.491653806115 

Episode:250 Reward:-3.9722116729926364 

total financial balance: (eur) -82056.93619180347 

internal rate of return 262.97498416713074 

net present value -51751.85465211629 

Episode:251 Reward:-2.302370914275406 

total financial balance: (eur) -47056.88133757626 

internal rate of return nan 

net present value -34497.857239207384 

Episode:252 Reward:-1.6201143724262366 

total financial balance: (eur) -38471.957592530925 

internal rate of return 81.86539063841127 

net present value -22612.129181448145 

Episode:253 Reward:-3.1745486592829457 

total financial balance: (eur) -76765.09481939062 

internal rate of return 484.844582442914 

net pre

Episode:300 Reward:-0.27033213895475 

total financial balance: (eur) -61848.45617536259 

internal rate of return 203.12840221177808 

net present value -41552.650780105905 

Episode:301 Reward:-2.858798436533387 

total financial balance: (eur) -67153.3285940539 

internal rate of return 562.8294371347531 

net present value -45191.86409057737 

Episode:302 Reward:-1.9977653494036605 

total financial balance: (eur) -39701.10445700903 

internal rate of return nan 

net present value -28394.16982132201 

Episode:303 Reward:-1.9825608995880675 

total financial balance: (eur) -58522.015116293995 

internal rate of return nan 

net present value -39102.104171889594 

Episode:304 Reward:-1.8511502003616278 

total financial balance: (eur) -69173.79717210116 

internal rate of return nan 

net present value -43651.56580022588 

Episode:305 Reward:-0.9780888985127394 

total financial balance: (eur) -44552.7810092801 

internal rate of return nan 

net present value -30698.451466988878 



Episode:350 Reward:-2.398143947874342 

total financial balance: (eur) -67475.88362527509 

internal rate of return nan 

net present value -41805.92748206222 

Episode:351 Reward:-1.0456488723681523 

total financial balance: (eur) -54090.302911962324 

internal rate of return nan 

net present value -36109.27764085526 

Episode:352 Reward:-2.144940570574253 

total financial balance: (eur) -71565.32264022293 

internal rate of return nan 

net present value -48214.427663625895 

Episode:353 Reward:-0.9338066281559686 

total financial balance: (eur) -57758.74544273044 

internal rate of return nan 

net present value -40978.245330140184 

Episode:354 Reward:-0.639851404359093 

total financial balance: (eur) -77838.08192441892 

internal rate of return nan 

net present value -53936.28397424627 

Episode:355 Reward:-5.709956212100675 

total financial balance: (eur) -104310.63436656402 

internal rate of return 385.51103376373845 

net present value -65680.37197524375 

Episode:356 R

Episode:402 Reward:-2.2880502975656256 

total financial balance: (eur) -86275.47165955091 

internal rate of return 612.6783718611879 

net present value -54688.827258924524 

Episode:403 Reward:-1.3791734227795938 

total financial balance: (eur) -87545.29685224945 

internal rate of return 148.50366362488367 

net present value -54354.747421970715 

Episode:404 Reward:1.6001788609759005 

total financial balance: (eur) -32238.09055142155 

internal rate of return -13.378589662822172 

net present value -25073.190615262363 

Episode:405 Reward:-2.1354112811799904 

total financial balance: (eur) -65840.72796279228 

internal rate of return 191.14581441718522 

net present value -43093.48596827876 

Episode:406 Reward:-1.4901265108825892 

total financial balance: (eur) -61631.33352441902 

internal rate of return 115.98589270742185 

net present value -37909.15032454669 

Episode:407 Reward:-0.9667679074168327 

total financial balance: (eur) -37257.77592168808 

internal rate of ret

Episode:454 Reward:-6.345924573475837 

total financial balance: (eur) -95271.28117317104 

internal rate of return nan 

net present value -58774.64876616784 

Episode:455 Reward:1.6157012535257909 

total financial balance: (eur) -68618.99193672725 

internal rate of return -56.305579420280026 

net present value -45851.78927847188 

Episode:456 Reward:-0.5266431547335642 

total financial balance: (eur) -85159.9013242488 

internal rate of return 163.60969227466123 

net present value -52046.81177742957 

Episode:457 Reward:1.9818726415652508 

total financial balance: (eur) -30704.931852988353 

internal rate of return -11.135450275270852 

net present value -26305.516043198288 

Episode:458 Reward:-0.9824241978708651 

total financial balance: (eur) -34761.0294216372 

internal rate of return 198.0385849946909 

net present value -25388.026354378126 

Episode:459 Reward:-0.24449929610342314 

total financial balance: (eur) -16196.533245120187 

internal rate of return -6.076398073

Episode:507 Reward:1.6579594845200336 

total financial balance: (eur) -30286.29543530722 

internal rate of return -12.49210830876204 

net present value -24456.20272015893 

Episode:508 Reward:-1.1181274303437385 

total financial balance: (eur) -63913.51514560192 

internal rate of return 522.1691630137784 

net present value -42646.70234601208 

Episode:509 Reward:-3.8682897587518728 

total financial balance: (eur) -52312.7915011039 

internal rate of return nan 

net present value -32133.583593527936 

Episode:510 Reward:-1.3719926668914886 

total financial balance: (eur) -30128.838700578537 

internal rate of return 4141.909975016781 

net present value -23809.345530797716 

Episode:511 Reward:-1.9174279410680837 

total financial balance: (eur) -51215.91353739592 

internal rate of return 940.9799496973372 

net present value -32899.48388557967 

Episode:512 Reward:-2.85573530548467 

total financial balance: (eur) -75947.08929450215 

internal rate of return nan 

net present

Episode:557 Reward:0.8905047115741354 

total financial balance: (eur) -20906.371763738443 

internal rate of return -11.751978643958939 

net present value -17849.805684576633 

Episode:558 Reward:-2.6244506808452317 

total financial balance: (eur) -63484.57685011995 

internal rate of return nan 

net present value -40658.12266398597 

Episode:559 Reward:-4.646223383625751 

total financial balance: (eur) -88850.23104461077 

internal rate of return 185.6438663068416 

net present value -54618.4157747501 

Episode:560 Reward:-3.9206850947667418 

total financial balance: (eur) -62506.982268956344 

internal rate of return 379.9478905751989 

net present value -38892.034416982424 

Episode:561 Reward:-1.2500493880116974 

total financial balance: (eur) -65772.38559924038 

internal rate of return nan 

net present value -42872.261548747476 

Episode:562 Reward:-2.644130367116756 

total financial balance: (eur) -104941.9989341893 

internal rate of return 1148.2260906293845 

net pre

Episode:618 Reward:-2.3039372643243943 

total financial balance: (eur) -48920.80888759994 

internal rate of return nan 

net present value -33700.341079123944 

Episode:619 Reward:-2.551104627333383 

total financial balance: (eur) -63876.6565471564 

internal rate of return 368.2302397174008 

net present value -42431.90424641134 

Episode:620 Reward:-3.6180141387026015 

total financial balance: (eur) -84318.56967394231 

internal rate of return 1090.1935003986591 

net present value -55366.040248475954 

Episode:621 Reward:-0.3879278673697572 

total financial balance: (eur) -33094.16412186809 

internal rate of return -28.486471689100146 

net present value -26024.024051368357 

Episode:622 Reward:0.783553384818536 

total financial balance: (eur) -44576.990328371365 

internal rate of return -34.997950772712784 

net present value -31464.775065826685 

Episode:623 Reward:0.5140967328121405 

total financial balance: (eur) -14919.825401565777 

internal rate of return -9.12520256

Episode:681 Reward:-2.823242668212647 

total financial balance: (eur) -68883.07574173775 

internal rate of return 410.79404321639777 

net present value -44005.08802919526 

Episode:682 Reward:-3.0936849236483086 

total financial balance: (eur) -63432.52146988335 

internal rate of return nan 

net present value -40511.38379510402 

Episode:683 Reward:-3.235665418854551 

total financial balance: (eur) -83466.78262179917 

internal rate of return nan 

net present value -52931.90007027454 

Episode:684 Reward:-2.7758542782503475 

total financial balance: (eur) -64419.9732504692 

internal rate of return nan 

net present value -42152.66294893468 

Episode:685 Reward:-1.7908426296537372 

total financial balance: (eur) -70008.95814650707 

internal rate of return 486.6640071247666 

net present value -46428.20844799044 

Episode:686 Reward:-1.3684758566191995 

total financial balance: (eur) -51986.23064313058 

internal rate of return 1735.993139432695 

net present value -34392.57

Episode:729 Reward:-1.3536237983113961 

total financial balance: (eur) -75020.5439487486 

internal rate of return 1409.433905189709 

net present value -48423.78606385318 

Episode:730 Reward:-0.7027672013114743 

total financial balance: (eur) -66767.92340678237 

internal rate of return nan 

net present value -44406.47737018821 

Episode:731 Reward:-0.42186891328656384 

total financial balance: (eur) -78062.9033947984 

internal rate of return nan 

net present value -53492.2373859505 

Episode:732 Reward:0.7878879809921328 

total financial balance: (eur) -36450.24728877865 

internal rate of return -15.93251676723474 

net present value -27022.20694530585 

Episode:733 Reward:-1.8142180066029223 

total financial balance: (eur) -54769.239480070064 

internal rate of return nan 

net present value -36615.8889155285 

Episode:734 Reward:1.2685771098410696 

total financial balance: (eur) -34733.694625004864 

internal rate of return -38.99419717315293 

net present value -25345.0

Episode:792 Reward:-4.154257095272989 

total financial balance: (eur) -61199.3048824905 

internal rate of return nan 

net present value -35742.04483351731 

Episode:793 Reward:-2.4519036068096582 

total financial balance: (eur) -62472.45683836631 

internal rate of return nan 

net present value -39548.56178125037 

Episode:794 Reward:-2.0334004601847426 

total financial balance: (eur) -57247.46562210876 

internal rate of return nan 

net present value -36847.60280538247 

Episode:795 Reward:-0.8984470062947348 

total financial balance: (eur) -44330.24487713377 

internal rate of return 356.27516435264255 

net present value -34362.073594987836 

Episode:796 Reward:-2.4864149707213272 

total financial balance: (eur) -83715.05352952692 

internal rate of return 119.55702804748012 

net present value -53278.073632429216 

Episode:797 Reward:-2.0989112667634107 

total financial balance: (eur) -73590.42249867614 

internal rate of return 114.2451774062387 

net present value -4617

Episode:845 Reward:-2.140392354033483 

total financial balance: (eur) -59519.25490136764 

internal rate of return 32824.91096369484 

net present value -37865.36586312628 

Episode:846 Reward:-0.6842233059808296 

total financial balance: (eur) -29957.240557275945 

internal rate of return 146.15283309751655 

net present value -20549.720599204255 

Episode:847 Reward:-1.4966438629110816 

total financial balance: (eur) -43391.93557520717 

internal rate of return 202.9718505895508 

net present value -28094.16117360619 

Episode:848 Reward:-2.3158198052778225 

total financial balance: (eur) -70786.47654127983 

internal rate of return 2324.0177206095454 

net present value -44559.01382429191 

Episode:849 Reward:-2.184666851242859 

total financial balance: (eur) -80327.90664644762 

internal rate of return 260.60133816744 

net present value -50648.59923872256 

Episode:850 Reward:-2.759081569142785 

total financial balance: (eur) -78143.89901197034 

internal rate of return nan 

Episode:897 Reward:1.6095881245056316 

total financial balance: (eur) -920.451104659548 

internal rate of return -0.31780782673335173 

net present value -7475.784471464971 

Episode:898 Reward:-1.9534323328469028 

total financial balance: (eur) -63405.030327173125 

internal rate of return 518.4603437467065 

net present value -39923.27490322901 

Episode:899 Reward:1.9938271771510399 

total financial balance: (eur) -24170.654213657996 

internal rate of return -10.717135114341147 

net present value -19706.65963665913 

Episode:900 Reward:-0.12838631048476146 

total financial balance: (eur) -59965.872170566225 

internal rate of return nan 

net present value -37746.15523803428 

Episode:901 Reward:-5.0658897072098625 

total financial balance: (eur) -94484.8179057492 

internal rate of return 348.6209007583028 

net present value -59071.27891619065 

Episode:902 Reward:-1.2497698746857477 

total financial balance: (eur) -46881.78381033691 

internal rate of return nan 

net pr

Episode:957 Reward:0.846833240781385 

total financial balance: (eur) -48055.3935491933 

internal rate of return -48.119057087354264 

net present value -32765.67260678938 

Episode:958 Reward:0.06067759982820644 

total financial balance: (eur) -39861.289644076736 

internal rate of return -32.18855506091326 

net present value -29723.31061241027 

Episode:959 Reward:-4.0161628008725225 

total financial balance: (eur) -90186.45026934672 

internal rate of return 265.46407774110105 

net present value -56958.559439324614 

Episode:960 Reward:-0.8867928124926411 

total financial balance: (eur) -47670.814484703886 

internal rate of return 123.84005240176363 

net present value -32505.220832454947 

Episode:961 Reward:-0.4439818024012593 

total financial balance: (eur) -58166.87829903938 

internal rate of return nan 

net present value -39546.39159331078 

Episode:962 Reward:-2.87434084650354 

total financial balance: (eur) -82746.03230893017 

internal rate of return 132.849750022

In [138]:
def make_env(rank: int, seed: int = 0) -> Callable:
    def _init() -> gym.Env:
        random.seed(seed + rank)
        np.random.seed(seed + rank) 
        env = TrainEnvironment(JA_60_arr, JA_240_arr, elec_consum_arr, import_price_rate, import_price_train_arr, Eff_train_arr, CAPEX_JA_train_arr)
        env.reset(seed=seed + rank)
        return env

    return _init
# Number of environments to run in parallel
num_cpu = 16
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])   

In [139]:
log_path = "./logs/"
eval_callback = EvalCallback(env_test, best_model_save_path = "C:/Users/kubaw/Desktop/DELFT/THESIS/CODE/TEST_MODELS/32_ja_low_4/",
                             log_path = log_path, n_eval_episodes = 750, eval_freq=10000,
                             deterministic=True, render=False)


In [143]:
policy_kwargs = dict(net_arch=dict(pi=[1536, 1536], vf=[1536, 1536]))

In [144]:
def linear_schedule(initial_value, final_value=0.00001):
    """
    Returns a function that computes a linearly decreasing value from initial_value to final_value.
    """
    def func(progress_remaining):
        # Calculate the decrease based on the remaining progress
        return final_value + (initial_value - final_value) * progress_remaining
    return func

# Define the learning rate using the linear schedule
learning_rate = linear_schedule(0.0002)

In [145]:
model = PPO("MlpPolicy", env, learning_rate = learning_rate, batch_size = 2048, n_epochs = 24, policy_kwargs = policy_kwargs, gamma = 0.99,  verbose=1, tensorboard_log = "C:/Users/kubaw/Desktop/DELFT/THESIS\CODE/TEST_MODELS/LOGS/logs")
TIMESTEPS = 10000000
model.learn(total_timesteps = TIMESTEPS, callback=eval_callback)

Using cpu device
Logging to C:/Users/kubaw/Desktop/DELFT/THESIS\CODE/TEST_MODELS/LOGS/logs\PPO_464


  value = annual_expense / self.current_budget_constraint


Eval num_timesteps=28928, episode_reward=26.80 +/- 14.67
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | 26.8     |
| time/              |          |
|    total_timesteps | 28928    |
---------------------------------
New best mean reward!
------------------------------
| time/              |       |
|    fps             | 873   |
|    iterations      | 1     |
|    time_elapsed    | 37    |
|    total_timesteps | 32768 |
------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 510         |
|    iterations           | 2           |
|    time_elapsed         | 128         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.011024598 |
|    clip_fraction        | 0.168       |
|    clip_range           | 0.2         |
|    entropy_lo

New best mean reward!
-------------------------------
| time/              |        |
|    fps             | 333    |
|    iterations      | 11     |
|    time_elapsed    | 1081   |
|    total_timesteps | 360448 |
-------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 333         |
|    iterations           | 12          |
|    time_elapsed         | 1179        |
|    total_timesteps      | 393216      |
| train/                  |             |
|    approx_kl            | 0.013723619 |
|    clip_fraction        | 0.188       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.5        |
|    explained_variance   | 0.495       |
|    learning_rate        | 0.000193    |
|    loss                 | 36.3        |
|    n_updates            | 264         |
|    policy_gradient_loss | -0.0189     |
|    value_loss           | 67.2        |
----------------------------------------

New best mean reward!
-------------------------------
| time/              |        |
|    fps             | 322    |
|    iterations      | 21     |
|    time_elapsed    | 2131   |
|    total_timesteps | 688128 |
-------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 323         |
|    iterations           | 22          |
|    time_elapsed         | 2230        |
|    total_timesteps      | 720896      |
| train/                  |             |
|    approx_kl            | 0.009245668 |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.18       |
|    explained_variance   | 0.494       |
|    learning_rate        | 0.000187    |
|    loss                 | 33          |
|    n_updates            | 504         |
|    policy_gradient_loss | -0.01       |
|    value_loss           | 64.9        |
----------------------------------------

New best mean reward!
--------------------------------
| time/              |         |
|    fps             | 305     |
|    iterations      | 31      |
|    time_elapsed    | 3319    |
|    total_timesteps | 1015808 |
--------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 303          |
|    iterations           | 32           |
|    time_elapsed         | 3453         |
|    total_timesteps      | 1048576      |
| train/                  |              |
|    approx_kl            | 0.0055917623 |
|    clip_fraction        | 0.0444       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.18        |
|    explained_variance   | 0.633        |
|    learning_rate        | 0.000181     |
|    loss                 | 26.9         |
|    n_updates            | 744          |
|    policy_gradient_loss | -0.00487     |
|    value_loss           | 56.4         |
----------------

------------------------------------------
| time/                   |              |
|    fps                  | 284          |
|    iterations           | 42           |
|    time_elapsed         | 4833         |
|    total_timesteps      | 1376256      |
| train/                  |              |
|    approx_kl            | 0.0044818865 |
|    clip_fraction        | 0.0434       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.77        |
|    explained_variance   | 0.67         |
|    learning_rate        | 0.000174     |
|    loss                 | 26.7         |
|    n_updates            | 984          |
|    policy_gradient_loss | -0.00495     |
|    value_loss           | 53.2         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 283          |
|    iterations           | 43           |
|    time_elapsed         | 4973         |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 52          |
|    time_elapsed         | 6352        |
|    total_timesteps      | 1703936     |
| train/                  |             |
|    approx_kl            | 0.006083116 |
|    clip_fraction        | 0.0477      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.72       |
|    explained_variance   | 0.671       |
|    learning_rate        | 0.000168    |
|    loss                 | 23.9        |
|    n_updates            | 1224        |
|    policy_gradient_loss | -0.00406    |
|    value_loss           | 48.9        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 268          |
|    iterations           | 53           |
|    time_elapsed         | 6476         |
|    total_timesteps      | 1

-----------------------------------------
| time/                   |             |
|    fps                  | 263         |
|    iterations           | 62          |
|    time_elapsed         | 7717        |
|    total_timesteps      | 2031616     |
| train/                  |             |
|    approx_kl            | 0.004345891 |
|    clip_fraction        | 0.0652      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.51       |
|    explained_variance   | 0.674       |
|    learning_rate        | 0.000162    |
|    loss                 | 26.4        |
|    n_updates            | 1464        |
|    policy_gradient_loss | -0.00303    |
|    value_loss           | 53.1        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 263         |
|    iterations           | 63          |
|    time_elapsed         | 7825        |
|    total_timesteps      | 206438

------------------------------------------
| time/                   |              |
|    fps                  | 265          |
|    iterations           | 72           |
|    time_elapsed         | 8875         |
|    total_timesteps      | 2359296      |
| train/                  |              |
|    approx_kl            | 0.0054598404 |
|    clip_fraction        | 0.0464       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 0.692        |
|    learning_rate        | 0.000156     |
|    loss                 | 24.2         |
|    n_updates            | 1704         |
|    policy_gradient_loss | -0.00371     |
|    value_loss           | 49.5         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 73          |
|    time_elapsed         | 8981        |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 265          |
|    iterations           | 82           |
|    time_elapsed         | 10105        |
|    total_timesteps      | 2686976      |
| train/                  |              |
|    approx_kl            | 0.0039524613 |
|    clip_fraction        | 0.0549       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.48        |
|    explained_variance   | 0.688        |
|    learning_rate        | 0.00015      |
|    loss                 | 25.8         |
|    n_updates            | 1944         |
|    policy_gradient_loss | -0.00308     |
|    value_loss           | 50.1         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 266          |
|    iterations           | 83           |
|    time_elapsed         | 10196        |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 275         |
|    iterations           | 92          |
|    time_elapsed         | 10952       |
|    total_timesteps      | 3014656     |
| train/                  |             |
|    approx_kl            | 0.022930501 |
|    clip_fraction        | 0.085       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.46       |
|    explained_variance   | 0.687       |
|    learning_rate        | 0.000143    |
|    loss                 | 25.9        |
|    n_updates            | 2184        |
|    policy_gradient_loss | -0.000643   |
|    value_loss           | 52.4        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 276          |
|    iterations           | 93           |
|    time_elapsed         | 11028        |
|    total_timesteps      | 3

------------------------------------------
| time/                   |              |
|    fps                  | 283          |
|    iterations           | 102          |
|    time_elapsed         | 11773        |
|    total_timesteps      | 3342336      |
| train/                  |              |
|    approx_kl            | 0.0068847323 |
|    clip_fraction        | 0.0615       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.17        |
|    explained_variance   | 0.702        |
|    learning_rate        | 0.000137     |
|    loss                 | 25.1         |
|    n_updates            | 2424         |
|    policy_gradient_loss | -0.0022      |
|    value_loss           | 49.7         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 284          |
|    iterations           | 103          |
|    time_elapsed         | 11851        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 291          |
|    iterations           | 112          |
|    time_elapsed         | 12608        |
|    total_timesteps      | 3670016      |
| train/                  |              |
|    approx_kl            | 0.0034949621 |
|    clip_fraction        | 0.0431       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.06        |
|    explained_variance   | 0.724        |
|    learning_rate        | 0.000131     |
|    loss                 | 22.4         |
|    n_updates            | 2664         |
|    policy_gradient_loss | -0.00356     |
|    value_loss           | 46.8         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 291          |
|    iterations           | 113          |
|    time_elapsed         | 12684        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 297          |
|    iterations           | 122          |
|    time_elapsed         | 13425        |
|    total_timesteps      | 3997696      |
| train/                  |              |
|    approx_kl            | 0.0065157674 |
|    clip_fraction        | 0.0382       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.867       |
|    explained_variance   | 0.722        |
|    learning_rate        | 0.000125     |
|    loss                 | 24.5         |
|    n_updates            | 2904         |
|    policy_gradient_loss | -0.00306     |
|    value_loss           | 48.5         |
------------------------------------------
Eval num_timesteps=4028928, episode_reward=36.91 +/- 14.57
Episode length: 25.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 25           |
|    me

------------------------------------------
| time/                   |              |
|    fps                  | 302          |
|    iterations           | 132          |
|    time_elapsed         | 14308        |
|    total_timesteps      | 4325376      |
| train/                  |              |
|    approx_kl            | 0.0038638022 |
|    clip_fraction        | 0.0581       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.76        |
|    explained_variance   | 0.732        |
|    learning_rate        | 0.000118     |
|    loss                 | 20.9         |
|    n_updates            | 3144         |
|    policy_gradient_loss | -0.00255     |
|    value_loss           | 43.9         |
------------------------------------------
Eval num_timesteps=4348928, episode_reward=35.65 +/- 14.12
Episode length: 25.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 25          |
|    mean_

-----------------------------------------
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 142         |
|    time_elapsed         | 15183       |
|    total_timesteps      | 4653056     |
| train/                  |             |
|    approx_kl            | 0.006606532 |
|    clip_fraction        | 0.0445      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.675      |
|    explained_variance   | 0.735       |
|    learning_rate        | 0.000112    |
|    loss                 | 22.5        |
|    n_updates            | 3384        |
|    policy_gradient_loss | -0.00233    |
|    value_loss           | 44.8        |
-----------------------------------------
Eval num_timesteps=4668928, episode_reward=37.73 +/- 15.43
Episode length: 25.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 25          |
|    mean_reward          | 

------------------------------------------
| time/                   |              |
|    fps                  | 310          |
|    iterations           | 152          |
|    time_elapsed         | 16060        |
|    total_timesteps      | 4980736      |
| train/                  |              |
|    approx_kl            | 0.0022776364 |
|    clip_fraction        | 0.0369       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.714       |
|    explained_variance   | 0.73         |
|    learning_rate        | 0.000106     |
|    loss                 | 22.7         |
|    n_updates            | 3624         |
|    policy_gradient_loss | -0.00174     |
|    value_loss           | 46.7         |
------------------------------------------
Eval num_timesteps=4988928, episode_reward=36.75 +/- 15.11
Episode length: 25.00 +/- 0.00
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 25         |
|    mean_rew

-----------------------------------------
| time/                   |             |
|    fps                  | 313         |
|    iterations           | 162         |
|    time_elapsed         | 16938       |
|    total_timesteps      | 5308416     |
| train/                  |             |
|    approx_kl            | 0.004118887 |
|    clip_fraction        | 0.0401      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.731      |
|    explained_variance   | 0.725       |
|    learning_rate        | 9.98e-05    |
|    loss                 | 22.5        |
|    n_updates            | 3864        |
|    policy_gradient_loss | -0.00258    |
|    value_loss           | 44.9        |
-----------------------------------------
Eval num_timesteps=5308928, episode_reward=36.27 +/- 14.89
Episode length: 25.00 +/- 0.00
---------------------------------------
| eval/                   |           |
|    mean_ep_length       | 25        |
|    mean_reward          | 36.3  

Eval num_timesteps=5628928, episode_reward=36.48 +/- 15.16
Episode length: 25.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 25           |
|    mean_reward          | 36.5         |
| time/                   |              |
|    total_timesteps      | 5628928      |
| train/                  |              |
|    approx_kl            | 0.0016872529 |
|    clip_fraction        | 0.0442       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.74        |
|    explained_variance   | 0.715        |
|    learning_rate        | 9.35e-05     |
|    loss                 | 22.9         |
|    n_updates            | 4104         |
|    policy_gradient_loss | -0.000614    |
|    value_loss           | 48.8         |
------------------------------------------
--------------------------------
| time/              |         |
|    fps             | 316     |
|    iterations      | 172     |
|   

Eval num_timesteps=5948928, episode_reward=37.38 +/- 14.64
Episode length: 25.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 25           |
|    mean_reward          | 37.4         |
| time/                   |              |
|    total_timesteps      | 5948928      |
| train/                  |              |
|    approx_kl            | 0.0023828838 |
|    clip_fraction        | 0.0309       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.565       |
|    explained_variance   | 0.731        |
|    learning_rate        | 8.73e-05     |
|    loss                 | 23.1         |
|    n_updates            | 4344         |
|    policy_gradient_loss | -0.00308     |
|    value_loss           | 45.6         |
------------------------------------------
--------------------------------
| time/              |         |
|    fps             | 320     |
|    iterations      | 182     |
|   

Eval num_timesteps=6268928, episode_reward=36.27 +/- 14.22
Episode length: 25.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 25          |
|    mean_reward          | 36.3        |
| time/                   |             |
|    total_timesteps      | 6268928     |
| train/                  |             |
|    approx_kl            | 0.002230943 |
|    clip_fraction        | 0.0292      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.506      |
|    explained_variance   | 0.746       |
|    learning_rate        | 8.11e-05    |
|    loss                 | 19.6        |
|    n_updates            | 4584        |
|    policy_gradient_loss | -0.00264    |
|    value_loss           | 41.7        |
-----------------------------------------
--------------------------------
| time/              |         |
|    fps             | 323     |
|    iterations      | 192     |
|    time_elapsed    |

Eval num_timesteps=6588928, episode_reward=38.19 +/- 14.92
Episode length: 25.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 25          |
|    mean_reward          | 38.2        |
| time/                   |             |
|    total_timesteps      | 6588928     |
| train/                  |             |
|    approx_kl            | 0.002294829 |
|    clip_fraction        | 0.0238      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.485      |
|    explained_variance   | 0.727       |
|    learning_rate        | 7.49e-05    |
|    loss                 | 23.1        |
|    n_updates            | 4824        |
|    policy_gradient_loss | -0.00288    |
|    value_loss           | 47          |
-----------------------------------------
New best mean reward!
--------------------------------
| time/              |         |
|    fps             | 326     |
|    iterations      | 202     |


--------------------------------
| time/              |         |
|    fps             | 329     |
|    iterations      | 211     |
|    time_elapsed    | 20992   |
|    total_timesteps | 6914048 |
--------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 329          |
|    iterations           | 212          |
|    time_elapsed         | 21069        |
|    total_timesteps      | 6946816      |
| train/                  |              |
|    approx_kl            | 0.0032746783 |
|    clip_fraction        | 0.0344       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.484       |
|    explained_variance   | 0.74         |
|    learning_rate        | 6.86e-05     |
|    loss                 | 20.1         |
|    n_updates            | 5064         |
|    policy_gradient_loss | -0.00256     |
|    value_loss           | 42.3         |
--------------------------------------

--------------------------------
| time/              |         |
|    fps             | 332     |
|    iterations      | 221     |
|    time_elapsed    | 21806   |
|    total_timesteps | 7241728 |
--------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 332         |
|    iterations           | 222         |
|    time_elapsed         | 21881       |
|    total_timesteps      | 7274496     |
| train/                  |             |
|    approx_kl            | 0.002737285 |
|    clip_fraction        | 0.0226      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.461      |
|    explained_variance   | 0.742       |
|    learning_rate        | 6.24e-05    |
|    loss                 | 21.2        |
|    n_updates            | 5304        |
|    policy_gradient_loss | -0.00251    |
|    value_loss           | 42.7        |
-----------------------------------------
-------------

--------------------------------
| time/              |         |
|    fps             | 334     |
|    iterations      | 231     |
|    time_elapsed    | 22623   |
|    total_timesteps | 7569408 |
--------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 334         |
|    iterations           | 232         |
|    time_elapsed         | 22698       |
|    total_timesteps      | 7602176     |
| train/                  |             |
|    approx_kl            | 0.003873638 |
|    clip_fraction        | 0.0256      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.475      |
|    explained_variance   | 0.734       |
|    learning_rate        | 5.62e-05    |
|    loss                 | 20.9        |
|    n_updates            | 5544        |
|    policy_gradient_loss | -0.00211    |
|    value_loss           | 41.1        |
-----------------------------------------
-------------

--------------------------------
| time/              |         |
|    fps             | 336     |
|    iterations      | 241     |
|    time_elapsed    | 23436   |
|    total_timesteps | 7897088 |
--------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 337         |
|    iterations           | 242         |
|    time_elapsed         | 23509       |
|    total_timesteps      | 7929856     |
| train/                  |             |
|    approx_kl            | 0.001962436 |
|    clip_fraction        | 0.035       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.45       |
|    explained_variance   | 0.729       |
|    learning_rate        | 5e-05       |
|    loss                 | 19.9        |
|    n_updates            | 5784        |
|    policy_gradient_loss | -0.00181    |
|    value_loss           | 41.1        |
-----------------------------------------
-------------

------------------------------------------
| time/                   |              |
|    fps                  | 339          |
|    iterations           | 252          |
|    time_elapsed         | 24323        |
|    total_timesteps      | 8257536      |
| train/                  |              |
|    approx_kl            | 0.0012968521 |
|    clip_fraction        | 0.0179       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.459       |
|    explained_variance   | 0.732        |
|    learning_rate        | 4.37e-05     |
|    loss                 | 23.5         |
|    n_updates            | 6024         |
|    policy_gradient_loss | -0.00197     |
|    value_loss           | 43.5         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 339          |
|    iterations           | 253          |
|    time_elapsed         | 24399        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 341          |
|    iterations           | 262          |
|    time_elapsed         | 25136        |
|    total_timesteps      | 8585216      |
| train/                  |              |
|    approx_kl            | 0.0021241414 |
|    clip_fraction        | 0.0229       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.444       |
|    explained_variance   | 0.716        |
|    learning_rate        | 3.75e-05     |
|    loss                 | 25.3         |
|    n_updates            | 6264         |
|    policy_gradient_loss | -0.00242     |
|    value_loss           | 46           |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 341          |
|    iterations           | 263          |
|    time_elapsed         | 25211        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 343          |
|    iterations           | 272          |
|    time_elapsed         | 25942        |
|    total_timesteps      | 8912896      |
| train/                  |              |
|    approx_kl            | 0.0012166662 |
|    clip_fraction        | 0.0171       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.405       |
|    explained_variance   | 0.743        |
|    learning_rate        | 3.13e-05     |
|    loss                 | 20.2         |
|    n_updates            | 6504         |
|    policy_gradient_loss | -0.00213     |
|    value_loss           | 40.8         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 343          |
|    iterations           | 273          |
|    time_elapsed         | 26016        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 345          |
|    iterations           | 282          |
|    time_elapsed         | 26757        |
|    total_timesteps      | 9240576      |
| train/                  |              |
|    approx_kl            | 0.0016528915 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.405       |
|    explained_variance   | 0.736        |
|    learning_rate        | 2.51e-05     |
|    loss                 | 22.5         |
|    n_updates            | 6744         |
|    policy_gradient_loss | -0.00206     |
|    value_loss           | 43.6         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 345          |
|    iterations           | 283          |
|    time_elapsed         | 26835        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 347          |
|    iterations           | 292          |
|    time_elapsed         | 27573        |
|    total_timesteps      | 9568256      |
| train/                  |              |
|    approx_kl            | 0.0008505134 |
|    clip_fraction        | 0.00942      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.4         |
|    explained_variance   | 0.736        |
|    learning_rate        | 1.88e-05     |
|    loss                 | 21.5         |
|    n_updates            | 6984         |
|    policy_gradient_loss | -0.00149     |
|    value_loss           | 44.3         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 347          |
|    iterations           | 293          |
|    time_elapsed         | 27648        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 348          |
|    iterations           | 302          |
|    time_elapsed         | 28382        |
|    total_timesteps      | 9895936      |
| train/                  |              |
|    approx_kl            | 0.0007362795 |
|    clip_fraction        | 0.00522      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.41        |
|    explained_variance   | 0.741        |
|    learning_rate        | 1.26e-05     |
|    loss                 | 21.4         |
|    n_updates            | 7224         |
|    policy_gradient_loss | -0.0013      |
|    value_loss           | 42.4         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 348          |
|    iterations           | 303          |
|    time_elapsed         | 28458        |
|    total_

<stable_baselines3.ppo.ppo.PPO at 0x14f732b8610>

In [146]:
model.save(r"C:\Users\kubaw\Desktop\DELFT\THESIS\CODE\TEST_MODELS\32_ja_low_4")

In [172]:
evaluate1(1, env_test, model)

# model 3 - max rew = 37.30
# model 4 - 38.21
# model 5 - 38.39

Act: [ 7 10] 
 Obs: [0.92634445 0.8925168  0.91141653 0.89069617 0.9930584  0.9240684
 0.9932654  0.98363453 0.9877893  0.9850194  0.9924606  0.99464905
 0.9421162  0.9174931  0.9921635  0.84989536 0.8701049  0.99775994
 0.995036   0.99662364 0.93052363 0.9832196  0.9883196  0.9898605
 0.9920565  0.9934534  0.9125185  0.9900119  0.90531087 0.85756266
 0.9914007  0.9929415  0.07363535 0.07836134 0.00636957 0.5167822
 0.35770845 0.09400264 0.18666667] 
 Balance 402.36250792691953
Act: [0 0] 
 Obs: [0.9216845  0.8840601  0.8926022  0.8757936  0.98503816 0.91861403
 0.9892795  0.9699225  0.96993166 0.9811535  0.98836875 0.9876858
 0.9301391  0.8950002  1.0026964  0.83625853 0.8600339  0.999407
 0.9887747  0.9849863  0.91840464 0.9732661  0.9706089  0.98661536
 0.9881776  0.9817699  0.9103362  0.9932513  0.885845   0.8431694
 0.9860694  0.9883433  0.0620769  0.07151994 0.00636957 0.4730827
 0.0559319  0.11265902 0.41066667] 
 Balance 1751.8465235468889
Act: [2 0] 
 Obs: [0.91732854 0.871059

In [173]:
def evaluate2(episodes, environment, model):
    
    mean_irr = 0
    mean_fin_balance = 0
    irr = 0
    fin_balance = 0
    count = 0
    npv = 0
    list_npv = []
    env_balance = 0
    mean_env_balance = 0

    for ep in range(episodes):

        obs, _ = environment.reset()  # Unpack the tuple and ignore the info part
        done = False

        while not done:
            action, _ = model.predict(obs)  # Now obs is just the observation array
            obs, reward, done, truncated, info = environment.step(action)
            # Extracting the 2nd and 3rd key-value pairs
            keys = list(info.keys())
            values = list(info.values())

            # Getting the 2nd key-value pair
            second_value = values[1]

            # Getting the 3rd key-value pair
            fourth_value = values[4]
            
            fith_value = values[5]
        
        fin_balance += second_value
        npv += fourth_value
        count += 1
        
        env_balance += fith_value
        
        list_npv.append(fourth_value)
            
    mean_fin_balance = fin_balance/count
    mean_npv = npv/count
    mean_env_balance = env_balance / count

    #print(mean_npv)

    environment.close()
    
    return(mean_npv, mean_env_balance)

In [174]:
evaluate2(3000, env_test, model)

# 29126.347277021632
# 29241.136351006262
# 36152

  value = annual_expense / self.current_budget_constraint


(22241.29117767018, 61643.237394789)

In [159]:
def basepolicy1(episodes, environment):
    
    mean_irr = 0
    mean_fin_balance = 0
    irr = 0
    fin_balance = 0
    count = 0
    irr_count = 0
    npv = 0
    list_npv = []
    env_balance = 0
    mean_env_balance = 0

    for ep in range(episodes):

        obs, _ = environment.reset()  # Unpack the tuple and ignore the info part
        done = False

        while not done:
            
            action = np.array([0, 0])
            for i, n in enumerate(obs):
                if i < 32:
                    if i < 16:
                        if n < 0.80:
                            action[0] += 1
                    if i >= 16:
                        if n < 0.80:
                            action[1] += 1
                    

            obs, reward, done, truncated, info = environment.step(action)

            # Extracting the 2nd and 3rd key-value pairs
            keys = list(info.keys())
            values = list(info.values())

            # Getting the 2nd key-value pair
            second_value = values[1]

            # Getting the 3rd key-value pair
    
            third_value = values[2]
            fourth_value = values[4]
            fith_value = values[5]
            
        
        fin_balance += second_value
        npv += fourth_value
        count += 1
        
        env_balance += fith_value
        
        list_npv.append(fourth_value)
            
    mean_fin_balance = fin_balance/count
    mean_env_balance = env_balance/count
    mean_npv = npv/count

    #print(mean_npv, "\n", mean_irr, "\n" )

    environment.close()
    
    return(mean_npv, mean_env_balance)

In [160]:
basepolicy1(5000, env_test)

#34811

  value = annual_expense / self.current_budget_constraint


(22103.15529939608, 62351.283144360495)