In [3]:
# imports
import pandas as pd
import numpy as np
import numpy_financial as npf
import random  
import time

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO, DQN, A2C
from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback, EvalCallback
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from sb3_contrib import RecurrentPPO

from environment_fx_no_env import calculate_import_export, test1, test2, test3, evaluate1, evaluate2, basepolicy

import gymnasium as gym
from gymnasium import spaces

import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
from typing import Callable

import torch
import torch as th
from torch import nn
import torch.nn as nn

ModuleNotFoundError: No module named 'numpy_financial'

In [None]:
# import and modify data

# Assuming the file is a CSV and specifying the correct path and filename
file_path = r"file_path"

# Use pandas to read the CSV file
JA_60 = pd.read_csv(file_path + "/JA_60")
JA_240 = pd.read_csv(file_path + "/JA_240")

elec_df = pd.read_csv(file_path + "/hourly_consumption_gemany2.csv")
import_price = pd.read_csv(file_path + "/electricity_tariff.csv")

#elec_df = elec_df * 1000
elec_df = elec_df.drop('HourOfYear', axis=1)

elec_df['hour_of_day'] = np.arange(8760) % 24
elec_df['day_of_week'] = np.arange(8760) // 24 % 7  # 0 is Monday, 6 is Sunday

# Define rates
peak_rate = 1.45
normal_rate = 1
off_peak_rate = 0.85

# Function to determine rate based on hour and day
def determine_rate(hour, day):
    if day < 5:  # Monday to Friday
        if 16 <= hour < 21:  # 4pm to 9pm
            return peak_rate
        elif 6 <= hour < 10:  # 7am to 9am and 10am to 3pm
            return normal_rate
        else:  # Off-peak times
            return off_peak_rate
    else:  # Weekend
        if 16 <= hour < 21:  # 4pm to 9pm
            return normal_rate
        else:  # Off-peak times
            return off_peak_rate
    
# Apply the function to each row to determine the rate
elec_df['rate'] = elec_df.apply(lambda row: determine_rate(row['hour_of_day'], row['day_of_week']), axis=1)

import_price_df = import_price.drop(columns=['x'])
import_price_df = import_price_df[:-26]

train_cols = random.sample(list(import_price_df.columns), 7000)
import_price_train = import_price_df[train_cols]
test_cols = [col for col in import_price_df.columns if col not in train_cols]
import_price_test = import_price_df[test_cols]

Eff = pd.read_csv(file_path + "/Efficency_impr")
Eff = (Eff)/100 + 1

CAPEX = pd.read_csv(file_path + "/CAPEX_JA.csv")
CAPEX_JA = (CAPEX[:26])

train_cols_CAPEX = random.sample(list(CAPEX_JA.columns), 7000)
test_cols_CAPEX = [col for col in CAPEX_JA.columns if col not in train_cols_CAPEX]

CAPEX_JA_train = CAPEX_JA[train_cols_CAPEX]
CAPEX_JA_test = CAPEX_JA[test_cols_CAPEX]

train_cols_Eff = random.sample(list(Eff.columns), 7000)
test_cols_Eff = [col for col in Eff.columns if col not in train_cols_Eff]

Eff_train = Eff[train_cols_Eff]
Eff_test = Eff[test_cols_Eff]

JA_60_arr = (np.array(JA_60.T)).flatten()
JA_240_arr = (np.array(JA_240.T)).flatten()

Eff_train_arr = np.array(Eff_train.T)
Eff_test_arr = np.array(Eff_test.T)

CAPEX_JA_train_arr = np.array(CAPEX_JA_train.T)
CAPEX_JA_test_arr = np.array(CAPEX_JA_test.T)

elec_consum_arr = np.array(elec_df["Consumption"])
import_price_rate = np.array(elec_df["rate"])

import_price_train_arr = np.array(import_price_train.T)
import_price_test_arr = np.array(import_price_test.T)

grid_factor = pd.read_csv(file_path + "/grid_factor.csv")
grid_factor =  grid_factor.T

train_cols_grid = random.sample(list(grid_factor.columns), 7000)
grid_factor_train = grid_factor[train_cols_grid]
test_cols_grid = [col for col in grid_factor.columns if col not in train_cols]
grid_factor_test = grid_factor[test_cols_grid]

grid_factor_train_arr = np.array(grid_factor_train.T)
grid_factor_test_arr = np.array(grid_factor_test.T)

pv_co2 = pd.read_csv(file_path + "/pv_emissions.csv")
pv_co2_arr = np.array(pv_co2)
pv_co2_arr = np.insert(pv_co2_arr, 0, 1.620)

In [None]:
class TrainEnvironment(gym.Env):
    def __init__(self, PV_90_arr, PV_270_arr, elec_consum_arr, import_price_rate, import_tariff, efficency, CAPEX, 
                 GRID_FACTOR, pv_co2_arr):
        
        # Price per watthour
        self.import_price_df = import_tariff
        self.import_price_at_zero = np.float32(0.00035)
        self.import_price_rate = import_price_rate
        
        # Energy Balance
        self.PV_90_arr = PV_90_arr
        self.PV_270_arr = PV_270_arr
        self.elec_df = elec_consum_arr
        self.max_export = 4000
        self.number_of_panels = 32
        
        # Degradation
        self.deg_mu = 0.82 # Trina: 1.19, JA: 0.82, Maxeon: 0.67
        self.deg_std = 0.555 
        self.phi = 30 # Trina: 15, JA: 30, Maxeon: 50
        
        # Efficency Development
        self.efficency_develop_df = efficency
        self.efficency_at_zero = 1.0
        
        # Costs
        self.power_at_zero = 415  # Trina: 265, JA: 415, Maxeon: 435
        self.cost_per_Wp_df_at_zero = 0.69 # Trina: 0.36, JA: 0.69, Maxeon: 1.58
        self.cost_per_Wp_df = CAPEX
        self.initial_other_costs = 150
        
        self.operational_cost = 16.8
        
        self.loan_interest_rate = 1.10
        self.normal_interest_rate = 1.02
        
        self.low_budget = 0 # Low budget: 0, High Budget: 750
        self.high_budget = 750 # Low budget: 750, High Budget: 2000
                        
        # Spaces and length
        self.action_space = spaces.MultiDiscrete([self.number_of_panels // 2, self.number_of_panels // 2])
        self.observation_space = spaces.Box(0, 1.25, shape=(self.number_of_panels + 8,))
        self.episode_len = 25
        self.months_per_timestep = 12
        
        # Emission
        self.grid_factor_df = GRID_FACTOR
        self.grid_factor_at_zero = 0.553 
        self.pv_emission = pv_co2_arr * self.power_at_zero
        
    def _get_obs(self):
        
        return self.observation
    
    def calculate_import_export(self, elec_df, export_price, import_price):
        
        """
        Calculate the annual Wh of energy exported to the grid (exported) and saved (minimised)
        """
        
        PV_90_tot = self._get_obs()[0:self.number_of_panels // 2].sum() * self.PV_90_arr 
        PV_270_tot = self._get_obs()[(self.number_of_panels // 2) : self.number_of_panels].sum() * self.PV_270_arr 
        
        AC_OUTPUT_tot = PV_90_tot + PV_270_tot
        
        exported = (AC_OUTPUT_tot - self.elec_df).clip(min=0, max = self.max_export)        
        export_revenue = (export_price * exported).sum()
        excess_energy = (AC_OUTPUT_tot - self.elec_df - self.max_export).clip(min=0)

        
        minimised = AC_OUTPUT_tot - exported 
        minimised_revenue = (minimised * (self.import_price_rate * import_price)).sum()
        
        AC_for_env = AC_OUTPUT_tot - excess_energy


        return export_revenue, AC_OUTPUT_tot, AC_for_env, minimised_revenue
    
    def reset(self, seed=None):
        
        """
        Reset the environment to the original state at t=1
        """
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
        
        # Panels
        self.init_obs = np.random.uniform(0, 1, size=self.number_of_panels).astype(np.float32)
        self.init_obs = np.where(self.init_obs < 0.5, 0.0, np.random.uniform(0.85, 1.0, size=self.number_of_panels))

        # Combine all initialization into a single step for efficiency
        self.import_price_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min()) / (self.import_price_df.max().max() - self.import_price_df.min().min())
        self.FiT_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min() * 0.33) / (self.import_price_df.max().max() - self.import_price_df.min().min() * 0.33)
        self.efficency_at_zero_norm = (self.efficency_at_zero - 0.999) / (1.156 - 0.999)
        self.panel_cost_and_inverter_at_zero_norm = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
        
        self.grid_factor_at_zero_norm = (self.grid_factor_at_zero + 0.05319002) / (0.55762151 + 0.05319002)
        
        self.current_budget_constraint = np.random.randint(self.low_budget, self.high_budget)
        self.next_step_budget_constraint = 0
        
        
        # Complete observation initialization in one go
        self.observation = np.concatenate([
            self.init_obs,
            [self.import_price_at_zero_norm, self.FiT_at_zero_norm, self.efficency_at_zero_norm, 
             self.panel_cost_and_inverter_at_zero_norm, 0., 0., 0., self.grid_factor_at_zero_norm]
        ]).astype(np.float32)

        self.previous_observation = self.observation.copy()

        # RANDOM IMPORT PRICE
        self.random_import_price = self.import_price_df[np.random.choice(self.import_price_df.shape[0])] 

        # RANDOM EFFICENCY
        self.random_efficency_develop = self.efficency_develop_df[np.random.choice(self.efficency_develop_df.shape[0])]   
        
        # RANDOM COST PER WP
        self.random_cost_per_Wp = self.cost_per_Wp_df[np.random.choice(self.cost_per_Wp_df.shape[0])]   

        # RANDOM Grid Factor
        self.random_grid_factor = self.grid_factor_df[np.random.choice(self.grid_factor_df.shape[0])]   

        
        self.episode_len = 25  
    
        info = {}
        
        # RESET BALANCES
        self.fin_balance_tot = 0
        self.reward_tot = 0
        self.env_balance_tot = 0
        self.produced = 0
        self.other_costs = 0
        self.FiT = 0.0004
        self.next_FiT = 0.0004

        self.total_cash_flow = []
        self.annual_cash_flow = 0
                
        self.due_loans = [0, 0, 0, 0] 
        self.current_interest = 0
        self.step_total_interest = 1
        self.survival = np.zeros(self.number_of_panels, dtype=np.float32)
        self.resale_values = array_of_zeros = np.zeros(self.number_of_panels, dtype=np.float32)
        
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)
        
        self.two_year_ago_interest = 0
        self.first_year_interest = []
        self.second_year_interest = [0]
        self.third_year_interest = [0, 0]
        self.fourth_year_interest = [0, 0, 0]
        self.next_year_total = 0
        
        self.survival = np.zeros(self.number_of_panels, dtype=np.float32)
    
        return self.observation, info
    
    def emission_balance(self, pv_production, grid_factor, panel_emission, action_step):
        
        curtailed = (pv_production.sum() * grid_factor)/1000
        
        number_installed = int(np.sum(action_step))
        
        panel_emission_tot = number_installed * panel_emission
        
        emission_balance = curtailed - panel_emission_tot
        
        return emission_balance 
    
    def calculate_resale(self, initial_panel_cost, indices):
        
        self.resale_values[indices] = initial_panel_cost
        
        self.resale_values = self.resale_values * 0.85
        
        for count, i in enumerate(self.broke):
            if i == 1:
                self.resale_values[count] = 0
        
        resale_step = self.resale_values[indices].sum()
        
        return resale_step
    
    def calculate_panel_inv_cost(self, cost_per_Wp):
        
        PW_ep = self.efficency_develop * self.power_at_zero
        
        panel_cost_and_inverter = PW_ep * cost_per_Wp
        
        return panel_cost_and_inverter
        
    def calculate_penalty(self, current_step, annual_expense):
              
        year = 25 - current_step
        
        if year > 0:
            self.current_budget_constraint = self.next_step_budget_constraint    
            
        
        self.current_interest = self.next_year_total
        annual_expense = (-annual_expense)
        value = 0 
        loan = 0
        annual_interest = 0

        if annual_expense > self.current_budget_constraint:
            loan = (self.current_budget_constraint - annual_expense)
            value = annual_expense / self.current_budget_constraint
            periods = 2 if value < 2 else 3 if value < 3 else 4

            annual_interest = loan / periods
            interest_multiplier = 1

            for i in range(4):
                if i < periods:
                    self.due_loans[i] = annual_interest * interest_multiplier
                    interest_multiplier *= self.loan_interest_rate
                else:
                    self.due_loans[i] = 0
        else:
             self.due_loans = [0, 0, 0, 0]
    
        self.first_year_interest.append(self.due_loans[0])
        self.second_year_interest.append(self.due_loans[1])
        self.third_year_interest.append(self.due_loans[2])
        self.fourth_year_interest.append(self.due_loans[3])
    
    
        self.next_year_total = self.first_year_interest[year] + self.second_year_interest[year] + self.third_year_interest[year] + self.fourth_year_interest[year]
        
        self.next_step_budget_constraint = np.random.randint(self.low_budget, self.high_budget) * self.step_total_interest
        current_budget_observation = (self.next_step_budget_constraint - self.low_budget * self.step_total_interest) / (self.high_budget * self.step_total_interest - self.low_budget * self.step_total_interest) 
        self.observation[self.number_of_panels + 6] = current_budget_observation
                
        return self.current_interest, self.due_loans, self.next_year_total
        
    def calculate_total_CAPEX(self, action_step, panel_cost_and_inverter):
        """
        Calculate CAPEX each step in a vectorized manner.
        """
        BOS = panel_cost_and_inverter * 0.55
        number_installed = int(np.sum(action_step))

        # Calculate costs from module and inverter
        panel_cost_and_inverter_step = panel_cost_and_inverter * number_installed

        # Calculate other installation costs
        if number_installed == 0:
            other_costs = 0
        elif number_installed == 1:
            other_costs = self.initial_other_costs * self.step_total_interest
        else:
            discounts = 0.9 ** np.arange(number_installed)
            other_costs = (self.initial_other_costs * self.step_total_interest * discounts).sum()

        # Calculate BOS costs using vector operations
        is_new_installation = (self.previous_observation[:number_installed] == 0) & (action_step[:number_installed] == 1)
        is_replacement = (self.previous_observation[:number_installed] > 0) & (action_step[:number_installed] == 1)
        BOS_cost = np.sum(BOS * is_new_installation) + np.sum((BOS / 2) * is_replacement)

        # Sum total CAPEX
        total_CAPEX = panel_cost_and_inverter_step + BOS_cost + other_costs

        return total_CAPEX, panel_cost_and_inverter
        
    def failure(self, actions):
        
        beta = 3  # Shape parameter

        # Determine which panels are active based on the actions and previous observations.
        if self.episode_len == 24:
            active_panels = (self.observation[:self.number_of_panels] > 0.85)
        else:
            active_panels = (self.observation[:self.number_of_panels] == self.efficency_develop)

        # Calculate lifespan for all active panels at once
        lifespans = np.random.weibull(beta, self.number_of_panels) * self.phi
        lifespans = np.where(active_panels, lifespans, 0)  # Apply lifespan only to active panels

        # Adjust survival times based on episode length
        self.survival[:self.number_of_panels] = np.where(
            active_panels,
            np.abs(lifespans.astype(int)) + np.abs(self.episode_len - 25),
            self.survival[:self.number_of_panels]
        )

        return self.survival

    def calculate_FiT(self, episodes, import_price):
            
        self.FiT = import_price
            
        if episodes == 25:
            self.FiT = self.FiT
            
        elif episodes == 24 or episodes == 23:
            self.FiT = self.FiT * 0.64
            
        elif episodes == 22:
            self.FiT = self.FiT * 0.46
            
        elif episodes == 21:
            self.FiT = self.FiT * 0.55
            
        elif episodes < 20:
            self.FiT = self.FiT * 0.33
            
        elif episodes == 20:
            self.FiT = self.FiT * 0.37
            
        return self.FiT
                        
    def step(self, action):
        
        """
        defines actions, reward etc.
        """
        
        # RESET THE ANNUAL BALANCES
        self.total_CAPEX = 0
        self.pv_costs = 0
        self.fin_balance = 0
        self.env_balance = 0
        self.number_installed = 0
        current_penalty = 0
        self.other_costs = 0
        next_step_penalty = 0
        self.step_total_interest = self.step_total_interest * self.normal_interest_rate
        current_operational_costs = self.operational_cost * self.step_total_interest
        
        
        self.cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
        self.import_price = self.random_import_price[abs(self.episode_len - 25)]
        self.efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
        self.grid_factor = self.random_grid_factor[abs(self.episode_len - 25)]
        self.step_pv_emission = (self.pv_emission[abs(self.episode_len - 25)] * self.efficency_develop)
        self.panel_cost_and_inverter = self.calculate_panel_inv_cost(self.cost_per_Wp)
        FiT = self.calculate_FiT(self.episode_len, self.import_price)
        
        reward = 0   
        
        # Find indices of the lowest 'action' values in previous_observation
        indices_0 = np.argsort(self.previous_observation[:(self.number_of_panels // 2)])[:action[0]]
        indices_1 = np.argsort(self.previous_observation[(self.number_of_panels // 2):self.number_of_panels])[:action[1]]

        indices = np.concatenate([indices_0, indices_1 + (self.number_of_panels // 2)])
        
        # Replace these indices in the observation with efficiency_develop
        self.observation[:self.number_of_panels][indices] = self.efficency_develop
        
        # Copy over the other values from previous_observation to observation
        mask = np.ones(len(self.previous_observation[:self.number_of_panels]), dtype=bool)
        mask[indices] = False
        self.observation[:self.number_of_panels][mask] = self.previous_observation[:self.number_of_panels][mask]

        replaced_panels = np.zeros(len(self.previous_observation[:self.number_of_panels]), dtype=int)
        replaced_panels[indices] = 1

        instaltion = (self.observation[:self.number_of_panels] > 0).astype(int)
        self.pv_costs -= instaltion.sum() * current_operational_costs

        actions_step = np.array(replaced_panels)
        
        action = action[0] + action[1]

            
        if action > 0:
            step_CAPEX, panel_cost_and_inverter = self.calculate_total_CAPEX(actions_step, self.panel_cost_and_inverter)
            self.pv_costs -= step_CAPEX
            
        else:
            panel_cost_and_inverter = 0
                
        next_observation = self._get_obs()

        # Calculate the Reslae value
        resale = self.calculate_resale(panel_cost_and_inverter, indices) #  ***
        
        self.pv_costs += resale
        
        # CALCULATE THE BUDGET INTEREST
        current_penalty, due_loans, next_step_penalty = self.calculate_penalty(self.episode_len, self.pv_costs)

        # CALCULATE THE ENERGY YIELD
        exported_revenue, AC_OUTPUT_tot, AC_for_env, minimised_revenue = self.calculate_import_export(self.elec_df, FiT, self.import_price)        
        
        # CALCULATE STEP EMISSIONS
        self.env_balance = self.emission_balance(AC_for_env, self.grid_factor, self.step_pv_emission, actions_step)

        self.env_balance_tot += self.env_balance 
        
        pv_costs_observation = - self.pv_costs / 10000
        self.observation[self.number_of_panels + 4] = pv_costs_observation
        
        next_step_penalty_observation = - next_step_penalty / 8000
        self.observation[self.number_of_panels + 5] = next_step_penalty_observation
        
        
        # CALCULATE STEP BALANCES
        self.fin_balance += self.pv_costs
        self.fin_balance += current_penalty
        self.fin_balance += float(exported_revenue + minimised_revenue)
        
        # CALCULATE TOTAL BALANCES
        self.fin_balance_tot += self.fin_balance                
        
        # SUBSTRACT 1 FOR TIMESTEP
        self.episode_len -= 1
        done = self.episode_len <= 0
        
        fin_mean = -2331
        fin_stdev = 1634
        
        env_mean = -5169
        st_dev = 3429
        
        reward = ((self.fin_balance - fin_mean) / fin_stdev) + ((self.env_balance - env_mean) / st_dev)
        
        # FAILURE
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)
        survival = self.failure(actions_step)
        
        for c, p in enumerate(survival):
            
            if c < self.number_of_panels:

                if p - 1 <= abs(self.episode_len - 24):
                    self.broke[c] = 1
                    self.observation[c] = 0
        
        # DEGRADATION RATE
        # Applying degradation only to panels that are operational (above 0.1 efficiency)
        active_panels = self.observation[:self.number_of_panels] > 0.1
        degradations = np.random.normal(self.deg_mu, self.deg_std, size=self.number_of_panels) / 100
        self.observation[:self.number_of_panels][active_panels] -= degradations[active_panels]
        
        if not done: 
        
            self.next_cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
            self.next_import_price = self.random_import_price[abs(self.episode_len - 25)]
            self.next_efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
            self.next_grid_factor = self.random_grid_factor[abs(self.episode_len - 25)]
            next_FIT = self.calculate_FiT(self.episode_len, self.next_import_price)
        
            price_observation = (self.next_import_price - 0.00022499) / (0.0020798 - 0.00022499)
            self.observation[self.number_of_panels] = price_observation

            FiT_observation = (next_FIT - 0.00022499 * 0.33) / (0.0020798 - 0.00022499 * 0.33)
            self.observation[self.number_of_panels + 1] = FiT_observation

            eff_observation = (self.next_efficency_develop - 0.999) / (1.156 - 0.999)
            self.observation[self.number_of_panels + 2] = eff_observation
            
            cost_per_Wp_observation = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
            self.observation[self.number_of_panels + 3] = cost_per_Wp_observation
            
            grid_factor_observation = (self.next_grid_factor - 0.553) / (0.553 - 0.00022499)

            self.observation[self.number_of_panels + 7] = grid_factor_observation
        
        info = {"step financial balance (eur):": self.fin_balance,
               "total financial balance: (eur)": self.fin_balance_tot,
               "internal rate of return": 0,
               "current_interest": reward,
                "net present value": (self.env_balance - env_mean) / st_dev}
         
        
        self.previous_observation = self.observation.copy()
        
        return self.observation, reward, done, False, info

In [None]:
class TestEnvironment(gym.Env):
    def __init__(self, PV_90_arr, PV_270_arr, elec_consum_arr, import_price_rate, import_tariff, efficency, CAPEX, 
                 GRID_FACTOR, pv_co2_arr):
        
        # Price per watthour
        self.import_price_df = import_tariff
        self.import_price_at_zero = np.float32(0.00035)
        self.import_price_rate = import_price_rate
        
        # Energy Balance
        self.PV_90_arr = PV_90_arr
        self.PV_270_arr = PV_270_arr
        self.elec_df = elec_consum_arr
        self.max_export = 4000
        self.number_of_panels = 32
        
        # Degradation
        self.deg_mu = 0.82 # Trina: 1.19, JA: 0.82, Maxeon: 0.67
        self.deg_std = 0.555 
        self.phi = 30 # Trina: 15, JA: 30, Maxeon: 50
        
        # Efficency Development
        self.efficency_develop_df = efficency
        self.efficency_at_zero = 1.0
        
        # Costs
        self.power_at_zero = 415  # Trina: 265, JA: 415, Maxeon: 435
        self.cost_per_Wp_df_at_zero = 0.69 # Trina: 0.36, JA: 0.69, Maxeon: 1.58
        self.cost_per_Wp_df = CAPEX
        self.initial_other_costs = 150
        
        self.operational_cost = 16.8
        
        self.loan_interest_rate = 1.10
        self.normal_interest_rate = 1.02
        
        self.low_budget = 0 # Low budget: 0, High Budget: 750
        self.high_budget = 750 # Low budget: 750, High Budget: 2000
                        
        # Spaces and length
        self.action_space = spaces.MultiDiscrete([self.number_of_panels // 2, self.number_of_panels // 2])
        self.observation_space = spaces.Box(0, 1.25, shape=(self.number_of_panels + 8,))
        self.episode_len = 25
        self.months_per_timestep = 12
        
        # Emission
        self.grid_factor_df = GRID_FACTOR #****
        self.grid_factor_at_zero = 0.553 
        self.pv_emission = pv_co2_arr * self.power_at_zero
        
    def _get_obs(self):
        
        return self.observation
    
    def calculate_import_export(self, elec_df, export_price, import_price):
        
        """
        Calculate the annual Wh of energy exported to the grid (exported) and saved (minimised)
        """
        
        PV_90_tot = self._get_obs()[0:self.number_of_panels // 2].sum() * self.PV_90_arr 
        PV_270_tot = self._get_obs()[(self.number_of_panels // 2) : self.number_of_panels].sum() * self.PV_270_arr 
        
        AC_OUTPUT_tot = PV_90_tot + PV_270_tot

        exported = (AC_OUTPUT_tot - self.elec_df).clip(min=0, max = self.max_export)  
        excess_energy = (AC_OUTPUT_tot - self.elec_df - self.max_export).clip(min=0)
        
        export_revenue = (export_price * exported).sum()

        
        minimised = AC_OUTPUT_tot - exported 
        minimised_revenue = (minimised * (self.import_price_rate * import_price)).sum()
        
        AC_for_env = AC_OUTPUT_tot - excess_energy

        return export_revenue, AC_OUTPUT_tot, AC_for_env, minimised_revenue
    
    def reset(self, seed=None):
        
        """
        Reset the environment to the original state at t=1
        """
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
            
        # Panels
        self.init_obs = np.random.uniform(0, 1, size=self.number_of_panels).astype(np.float32)
        self.init_obs = np.where(self.init_obs < 0.5, 0.0, np.random.uniform(0.85, 1.0, size=self.number_of_panels))

        # Combine all initialization into a single step for efficiency
        self.import_price_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min()) / (self.import_price_df.max().max() - self.import_price_df.min().min())
        self.FiT_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min() * 0.33) / (self.import_price_df.max().max() - self.import_price_df.min().min() * 0.33)
        self.efficency_at_zero_norm = (self.efficency_at_zero - 0.999) / (1.156 - 0.999)
        self.panel_cost_and_inverter_at_zero_norm = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
        
        self.grid_factor_at_zero_norm = (self.grid_factor_at_zero + 0.05319002) / (0.55762151 + 0.05319002)
        
        self.current_budget_constraint = np.random.randint(self.low_budget, self.high_budget)
        self.next_step_budget_constraint = 0
        
        
        # Complete observation initialization in one go
        self.observation = np.concatenate([
            self.init_obs,
            [self.import_price_at_zero_norm, self.FiT_at_zero_norm, self.efficency_at_zero_norm, 
             self.panel_cost_and_inverter_at_zero_norm, 0., 0., 0., self.grid_factor_at_zero_norm]
        ]).astype(np.float32) #***

        self.previous_observation = self.observation.copy()

        # RANDOM IMPORT PRICE
        self.random_import_price = self.import_price_df[np.random.choice(self.import_price_df.shape[0])] 

        # RANDOM EFFICENCY
        self.random_efficency_develop = self.efficency_develop_df[np.random.choice(self.efficency_develop_df.shape[0])]   
        
        # RANDOM COST PER WP
        self.random_cost_per_Wp = self.cost_per_Wp_df[np.random.choice(self.cost_per_Wp_df.shape[0])]   
        
        # RANDOM Grid Factor
        self.random_grid_factor = self.grid_factor_df[np.random.choice(self.grid_factor_df.shape[0])]   #***
        
        self.episode_len = 25  
    
        info = {}
        
        # RESET BALANCES
        self.fin_balance_tot = 0
        self.reward_tot = 0
        self.env_balance_tot = 0
        self.produced = 0
        self.other_costs = 0
        self.FiT = 0.0004
        self.next_FiT = 0.0004
        self.resale_values = array_of_zeros = np.zeros(self.number_of_panels, dtype=np.float32)
        
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)
        self.total_cash_flow = []
        self.annual_cash_flow = 0
                
        self.due_loans = [0, 0, 0, 0] 
        self.current_interest = 0
        self.step_total_interest = 1
        
        self.two_year_ago_interest = 0
        self.first_year_interest = []
        self.second_year_interest = [0]
        self.third_year_interest = [0, 0]
        self.fourth_year_interest = [0, 0, 0]
        self.next_year_total = 0
        
        self.survival = np.zeros(self.number_of_panels, dtype=np.float32)
    
        return self.observation, info
    
    def emission_balance(self, pv_production, grid_factor, panel_emission, action_step):
        
        curtailed = (pv_production.sum() * grid_factor)/1000
        
        number_installed = int(np.sum(action_step))
        
        panel_emission_tot = number_installed * panel_emission
        
        emission_balance = curtailed - panel_emission_tot
        
        return emission_balance 
    
    def calculate_resale(self, initial_panel_cost, indices):
        
        self.resale_values[indices] = initial_panel_cost
        
        self.resale_values = self.resale_values * 0.85
        
        for count, i in enumerate(self.broke):
            if i == 1:
                self.resale_values[count] = 0
        
        resale_step = self.resale_values[indices].sum()
        
        return resale_step
    
    def calculate_panel_inv_cost(self, cost_per_Wp):
        
        PW_ep = self.efficency_develop * self.power_at_zero
        
        panel_cost_and_inverter = PW_ep * cost_per_Wp
        
        return panel_cost_and_inverter
    
    def calculate_irr_and_npv(self, pv_cost, minimised_revenue, export_revenue, penalty):
                
        """
        Calculates total cash flow of the project needed for the internal rate of return
        """ 
        self.expences = 0
        self.annual_cash_flow = 0
        initial_cost = 0
        
        self.expences = pv_cost
        self.annual_cash_flow = self.expences + export_revenue + minimised_revenue + penalty
        initial_cost_q, x = self.calculate_total_CAPEX(self.init_obs, self.panel_cost_and_inverter)
        #initial_cost = - initial_cost_q
        
        if self.episode_len == 24:
            self.total_cash_flow.append(initial_cost + self.annual_cash_flow) 
        else:
            self.total_cash_flow.append(self.annual_cash_flow) 
        
        return self.total_cash_flow
        
    def calculate_penalty(self, current_step, annual_expense):
              
        year = 25 - current_step
        
        if year > 0:
            self.current_budget_constraint = self.next_step_budget_constraint    
            
        
        self.current_interest = self.next_year_total
        annual_expense = (-annual_expense)
        value = 0 
        loan = 0
        annual_interest = 0

        if annual_expense > self.current_budget_constraint:
            loan = (self.current_budget_constraint - annual_expense)
            value = annual_expense / self.current_budget_constraint
            periods = 2 if value < 2 else 3 if value < 3 else 4

            annual_interest = loan / periods
            interest_multiplier = 1

            for i in range(4):
                if i < periods:
                    self.due_loans[i] = annual_interest * interest_multiplier
                    interest_multiplier *= self.loan_interest_rate
                else:
                    self.due_loans[i] = 0
        else:
             self.due_loans = [0, 0, 0, 0]
    
        self.first_year_interest.append(self.due_loans[0])
        self.second_year_interest.append(self.due_loans[1])
        self.third_year_interest.append(self.due_loans[2])
        self.fourth_year_interest.append(self.due_loans[3])
    
    
        self.next_year_total = self.first_year_interest[year] + self.second_year_interest[year] + self.third_year_interest[year] + self.fourth_year_interest[year]
        
        self.next_step_budget_constraint = np.random.randint(750, 2000) * self.step_total_interest
        current_budget_observation = (self.next_step_budget_constraint - 750 * self.step_total_interest) / (2000 * self.step_total_interest - 750 * self.step_total_interest) 
        self.observation[self.number_of_panels + 6] = current_budget_observation
                
        return self.current_interest, self.due_loans, self.next_year_total
        
    def calculate_total_CAPEX(self, action_step, panel_cost_and_inverter):
        """
        Calculate CAPEX each step in a vectorized manner.
        """
        BOS = panel_cost_and_inverter * 0.55
        number_installed = int(np.sum(action_step))

        # Calculate costs from module and inverter
        panel_cost_and_inverter_step = panel_cost_and_inverter * number_installed

        # Calculate other installation costs
        if number_installed == 0:
            other_costs = 0
        elif number_installed == 1:
            other_costs = self.initial_other_costs * self.step_total_interest
        else:
            discounts = 0.9 ** np.arange(number_installed)
            other_costs = (self.initial_other_costs * self.step_total_interest * discounts).sum()

        # Calculate BOS costs using vector operations
        is_new_installation = (self.previous_observation[:number_installed] == 0) & (action_step[:number_installed] == 1)
        is_replacement = (self.previous_observation[:number_installed] > 0) & (action_step[:number_installed] == 1)
        BOS_cost = np.sum(BOS * is_new_installation) + np.sum((BOS / 2) * is_replacement)

        # Sum total CAPEX
        total_CAPEX = panel_cost_and_inverter_step + BOS_cost + other_costs

        return total_CAPEX, panel_cost_and_inverter
        
    def failure(self, actions):
        
        beta = 3  # Shape parameter

        # Determine which panels are active based on the actions and previous observations.
        if self.episode_len == 24:
            active_panels = (self.observation[:self.number_of_panels] > 0.85)
        else:
            active_panels = (self.observation[:self.number_of_panels] == self.efficency_develop)

        # Calculate lifespan for all active panels at once
        lifespans = np.random.weibull(beta, self.number_of_panels) * self.phi
        lifespans = np.where(active_panels, lifespans, 0)  # Apply lifespan only to active panels

        # Adjust survival times based on episode length
        self.survival[:self.number_of_panels] = np.where(
            active_panels,
            np.abs(lifespans.astype(int)) + np.abs(self.episode_len - 25),
            self.survival[:self.number_of_panels]
        )

        return self.survival

    def calculate_FiT(self, episodes, import_price):
            
        self.FiT = import_price
            
        if episodes == 25:
            self.FiT = self.FiT
            
        elif episodes == 24 or episodes == 23:
            self.FiT = self.FiT * 0.64
            
        elif episodes == 22:
            self.FiT = self.FiT * 0.46
            
        elif episodes == 21:
            self.FiT = self.FiT * 0.55
            
        elif episodes < 20:
            self.FiT = self.FiT * 0.33
            
        elif episodes == 20:
            self.FiT = self.FiT * 0.37
            
        return self.FiT
                        
    def step(self, action):
        
        """
        defines actions, reward etc.
        """
        
        # RESET THE ANNUAL BALANCES
        self.total_CAPEX = 0
        self.pv_costs = 0
        self.fin_balance = 0
        self.env_balance = 0
        self.number_installed = 0
        irr_fin = 0
        npv_fin = 0
        current_penalty = 0
        self.other_costs = 0
        next_step_penalty = 0
        self.step_total_interest = self.step_total_interest * self.normal_interest_rate
        current_operational_costs = self.operational_cost * self.step_total_interest
        
        
        self.cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
        self.import_price = self.random_import_price[abs(self.episode_len - 25)]
        self.efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
        self.grid_factor = self.random_grid_factor[abs(self.episode_len - 25)]
        self.step_pv_emission = (self.pv_emission[abs(self.episode_len - 25)] * self.efficency_develop)
           
        self.panel_cost_and_inverter = self.calculate_panel_inv_cost(self.cost_per_Wp)
        FiT = self.calculate_FiT(self.episode_len, self.import_price)
        
        reward = 0   
        
        # Find indices of the lowest 'action' values in previous_observation
        indices_0 = np.argsort(self.previous_observation[:(self.number_of_panels // 2)])[:action[0]]
        indices_1 = np.argsort(self.previous_observation[(self.number_of_panels // 2):self.number_of_panels])[:action[1]]

        indices = np.concatenate([indices_0, indices_1 + (self.number_of_panels // 2)])
        
        # Replace these indices in the observation with efficiency_develop
        self.observation[:self.number_of_panels][indices] = self.efficency_develop

        # Copy over the other values from previous_observation to observation
        mask = np.ones(len(self.previous_observation[:self.number_of_panels]), dtype=bool)
        mask[indices] = False
        self.observation[:self.number_of_panels][mask] = self.previous_observation[:self.number_of_panels][mask]

        replaced_panels = np.zeros(len(self.previous_observation[:self.number_of_panels]), dtype=int)
        replaced_panels[indices] = 1

        instaltion = (self.observation[:self.number_of_panels] > 0).astype(int)
        self.pv_costs -= instaltion.sum() * current_operational_costs

        actions_step = np.array(replaced_panels)

        action = action[0] + action[1]
        
        if action > 0:
            step_CAPEX, panel_cost_and_inverter = self.calculate_total_CAPEX(actions_step, self.panel_cost_and_inverter)
            self.pv_costs -= step_CAPEX
            
        else:
            panel_cost_and_inverter = 0
                
        next_observation = self._get_obs()

        # Calculate the Reslae value
        resale = self.calculate_resale(panel_cost_and_inverter, indices) #  ***
        
        self.pv_costs += resale

        
        # CALCULATE THE BUDGET INTEREST
        current_penalty, due_loans, next_step_penalty = self.calculate_penalty(self.episode_len, self.pv_costs)
        
        
        # CALCULATE THE ENERGY YIELD
        exported_revenue, AC_OUTPUT_tot, AC_for_env, minimised_revenue = self.calculate_import_export(self.elec_df, FiT, self.import_price)        
        
        
        # CALCULATE STEP EMISSIONS
        self.env_balance = self.emission_balance(AC_for_env, self.grid_factor, self.step_pv_emission, actions_step)
        
        self.env_balance_tot += self.env_balance
        
        pv_costs_observation = - self.pv_costs / 10000
        self.observation[self.number_of_panels + 4] = pv_costs_observation
        
        next_step_penalty_observation = - next_step_penalty / 8000
        self.observation[self.number_of_panels + 5] = next_step_penalty_observation
        
        # CALCULATE STEP BALANCES
        self.fin_balance += self.pv_costs
        self.fin_balance += current_penalty
        self.fin_balance += float(exported_revenue + minimised_revenue)
        
        # CALCULATE TOTAL BALANCES
        self.fin_balance_tot += self.fin_balance                
        
        # SUBSTRACT 1 FOR TIMESTEP
        self.episode_len -= 1
        done = self.episode_len <= 0
        
        # CALCULATE IRR, NPV AND CARBON INTENSITY
        total_cash_flow = self.calculate_irr_and_npv(self.pv_costs, exported_revenue, minimised_revenue, current_penalty)
        irr = npf.irr(total_cash_flow) * 100
        npv = npf.npv(0.04 ,total_cash_flow)
            
        # RETURNS AND CALCULATE REWARD
        if self.episode_len == 0:
            irr_fin = irr
            npv_fin = npv
        
        fin_mean = -2331
        fin_stdev = 1634
        
        env_mean = -5169
        st_dev = 3429
        
        reward = ((self.fin_balance - fin_mean) / fin_stdev) + ((self.env_balance - env_mean) / st_dev)
        #reward = self.fin_balance_tot / 1000 if done else 0
        
        # FAILURE
         
        survival = self.failure(actions_step)
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)

        for c, p in enumerate(survival):
            
            if c < self.number_of_panels:

                if p - 1 <= abs(self.episode_len - 24):
                    self.broke[c] = 1

                    self.observation[c] = 0
        
        # DEGRADATION RATE
        # Applying degradation only to panels that are operational (above 0.1 efficiency)
        active_panels = self.observation[:self.number_of_panels] > 0.1
        degradations = np.random.normal(self.deg_mu, self.deg_std, size=self.number_of_panels) / 100
        self.observation[:self.number_of_panels][active_panels] -= degradations[active_panels]
        
        if not done: 
        
            self.next_cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
            self.next_import_price = self.random_import_price[abs(self.episode_len - 25)]
            self.next_efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
            self.next_grid_factor = self.random_grid_factor[abs(self.episode_len - 25)]
            next_FIT = self.calculate_FiT(self.episode_len, self.next_import_price)
        
            price_observation = (self.next_import_price - 0.00022499) / (0.0020798 - 0.00022499)
            self.observation[self.number_of_panels] = price_observation

            FiT_observation = (next_FIT - 0.00022499 * 0.33) / (0.0020798 - 0.00022499 * 0.33)
            self.observation[self.number_of_panels + 1] = FiT_observation

            eff_observation = (self.next_efficency_develop - 0.999) / (1.156 - 0.999)
            self.observation[self.number_of_panels + 2] = eff_observation

            cost_per_Wp_observation = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
            self.observation[self.number_of_panels + 3] = cost_per_Wp_observation
            
            grid_factor_observation = (self.next_grid_factor - 0.553) / (0.553 - 0.00022499)

            self.observation[self.number_of_panels + 7] = grid_factor_observation

        
        
        info = {"step financial balance (eur):": self.env_balance,
               "total financial balance: (eur)": self.fin_balance_tot,
               "total environmental balance: (kgco2)": self.env_balance_tot,
               "current_interest": current_penalty,
                "finbalance": self.env_balance,
               "reward": npv_fin}
         
        
        self.previous_observation = self.observation.copy()
        
        return self.observation, reward, done, False, info

In [5]:
env = TrainEnvironment(JA_60_arr, JA_240_arr, elec_consum_arr, import_price_rate, import_price_train_arr, Eff_train_arr, 
                       CAPEX_JA_train_arr, grid_factor_test_arr, pv_co2_arr)
env_test = TestEnvironment(JA_60_arr, JA_240_arr, elec_consum_arr, import_price_rate, import_price_test_arr, Eff_test_arr, 
                           CAPEX_JA_test_arr, grid_factor_train_arr, pv_co2_arr)

In [23]:
#check_env(env)
def test4(episodes, environment):    
    for episode in range(episodes):
        done = False
        obs = environment.reset()
        step = 0
        print(obs, "\n")
        while not done:
            step += 1
            random_action = environment.action_space.sample()
            obs, reward, done, trun, info = environment.step(random_action)
            
            
            # Extracting the 2nd and 3rd key-value pairs
            keys = list(info.keys())
            values = list(info.values())

            # Getting the 2nd key-value pair
            zeroth_key = keys[4]
            zeroth_value = values[4]

            # Getting the 3rd key-value pair

            sixth_key = keys[3]
            sixth_value = values[3]
            
            print("STEP:", step)
            print("ACT","\n",  random_action)
            print("OBS","\n",  obs)
            print(zeroth_key, zeroth_value, sixth_key, sixth_value)
            print("\n")

In [24]:
test4(1, env)

(array([0.        , 0.9643404 , 0.9753191 , 0.8551375 , 0.        ,
       0.9549957 , 0.        , 0.        , 0.9190497 , 0.9455148 ,
       0.920098  , 0.91300225, 0.        , 0.        , 0.94720644,
       0.        , 0.9189833 , 0.        , 0.9150068 , 0.8622742 ,
       0.94984674, 0.8981805 , 0.93276334, 0.        , 0.9551914 ,
       0.9876699 , 0.        , 0.9773744 , 0.        , 0.        ,
       0.9175955 , 0.99942166, 0.06935652, 0.11512984, 0.00636943,
       0.7395664 , 0.        , 0.        , 0.        , 0.99243385],
      dtype=float32), {}) 

STEP: 1
ACT 
 [13 14]
OBS 
 [ 0.9976064   0.96572536  0.963991    0.9934892   0.9910786   0.94208837
  0.9899317   1.0096053   0.99065423  0.9949906   0.99297875  0.99364895
  0.9907915   0.9889373   1.0031904   0.99982375  0.98327965  0.99042135
  1.0026557   0.98551285  0.9971334   0.99377966  0.98982555  1.0008972
  0.9969536   0.99066037  0.9859411   0.99634206  0.98629063  0.9921009
  0.99130607  0.98348767  0.07068363  0.076

In [None]:
test1(1000, env_test)

In [None]:
def make_env(rank: int, seed: int = 0) -> Callable:
    def _init() -> gym.Env:
        random.seed(seed + rank)
        np.random.seed(seed + rank) 
        env = TrainEnvironment(JA_60_arr, JA_240_arr, elec_consum_arr, import_price_rate, import_price_train_arr, Eff_train_arr, 
                       CAPEX_JA_train_arr, grid_factor_test_arr, pv_co2_arr)
        env.reset(seed=seed + rank)
        return env

    return _init
# Number of environments to run in parallel
num_cpu = 16
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])   

In [16]:
log_path = "./logs/"
eval_callback = EvalCallback(env_test, best_model_save_path = "C:/Users/kubaw/Desktop/DELFT/THESIS/CODE/TEST_MODELS/32_ENV_JA_LOW_2/",
                             log_path = log_path, n_eval_episodes = 750, eval_freq=15000,
                             deterministic=True, render=False)


In [17]:
policy_kwargs = dict(net_arch=dict(pi=[2048, 2048], vf=[2048, 2048]))

In [18]:
def linear_schedule(initial_value, final_value=0.00001):
    """
    Returns a function that computes a linearly decreasing value from initial_value to final_value.
    """
    def func(progress_remaining):
        # Calculate the decrease based on the remaining progress
        return final_value + (initial_value - final_value) * progress_remaining
    return func

# Define the learning rate using the linear schedule
learning_rate = linear_schedule(0.0002)

In [19]:
model = PPO("MlpPolicy", env, learning_rate = learning_rate, batch_size = 2048, n_epochs = 24, policy_kwargs = policy_kwargs, gamma = 0.99,  verbose=1, tensorboard_log = "C:/Users/kubaw/Desktop/DELFT/THESIS\CODE/TEST_MODELS/LOGS/logs")
TIMESTEPS = 10000000
model.learn(total_timesteps = TIMESTEPS, callback=eval_callback)

Using cpu device
Logging to C:/Users/kubaw/Desktop/DELFT/THESIS\CODE/TEST_MODELS/LOGS/logs\PPO_467




------------------------------
| time/              |       |
|    fps             | 1344  |
|    iterations      | 1     |
|    time_elapsed    | 24    |
|    total_timesteps | 32768 |
------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 416         |
|    iterations           | 2           |
|    time_elapsed         | 157         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.016636478 |
|    clip_fraction        | 0.31        |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.53       |
|    explained_variance   | -0.00174    |
|    learning_rate        | 0.000199    |
|    loss                 | 20          |
|    n_updates            | 24          |
|    policy_gradient_loss | -0.0354     |
|    value_loss           | 42.8        |
-----------------------------------------
---------------------------

  value = annual_expense / self.current_budget_constraint


Eval num_timesteps=240000, episode_reward=103.76 +/- 9.59
Episode length: 25.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 25          |
|    mean_reward          | 104         |
| time/                   |             |
|    total_timesteps      | 240000      |
| train/                  |             |
|    approx_kl            | 0.014866164 |
|    clip_fraction        | 0.261       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.92       |
|    explained_variance   | 0.66        |
|    learning_rate        | 0.000196    |
|    loss                 | 22.8        |
|    n_updates            | 168         |
|    policy_gradient_loss | -0.0265     |
|    value_loss           | 49.1        |
-----------------------------------------
New best mean reward!
-------------------------------
| time/              |        |
|    fps             | 266    |
|    iterations      | 8      |
|    

----------------------------------------
| time/                   |            |
|    fps                  | 251        |
|    iterations           | 18         |
|    time_elapsed         | 2347       |
|    total_timesteps      | 589824     |
| train/                  |            |
|    approx_kl            | 0.01109109 |
|    clip_fraction        | 0.129      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.42      |
|    explained_variance   | 0.918      |
|    learning_rate        | 0.000189   |
|    loss                 | 22.3       |
|    n_updates            | 408        |
|    policy_gradient_loss | -0.013     |
|    value_loss           | 44.7       |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 250          |
|    iterations           | 19           |
|    time_elapsed         | 2482         |
|    total_timesteps      | 622592       |
| tr

------------------------------------------
| time/                   |              |
|    fps                  | 248          |
|    iterations           | 29           |
|    time_elapsed         | 3827         |
|    total_timesteps      | 950272       |
| train/                  |              |
|    approx_kl            | 0.0069922125 |
|    clip_fraction        | 0.0511       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.25        |
|    explained_variance   | 0.941        |
|    learning_rate        | 0.000183     |
|    loss                 | 20.9         |
|    n_updates            | 672          |
|    policy_gradient_loss | -0.00557     |
|    value_loss           | 42.2         |
------------------------------------------
Eval num_timesteps=960000, episode_reward=104.92 +/- 7.13
Episode length: 25.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 25           |
|    mea

------------------------------------------
| time/                   |              |
|    fps                  | 245          |
|    iterations           | 39           |
|    time_elapsed         | 5214         |
|    total_timesteps      | 1277952      |
| train/                  |              |
|    approx_kl            | 0.0044237915 |
|    clip_fraction        | 0.0408       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.64        |
|    explained_variance   | 0.952        |
|    learning_rate        | 0.000176     |
|    loss                 | 17           |
|    n_updates            | 912          |
|    policy_gradient_loss | -0.00416     |
|    value_loss           | 36.7         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 245         |
|    iterations           | 40          |
|    time_elapsed         | 5346        |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 244          |
|    iterations           | 50           |
|    time_elapsed         | 6698         |
|    total_timesteps      | 1638400      |
| train/                  |              |
|    approx_kl            | 0.0053923726 |
|    clip_fraction        | 0.0695       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.46        |
|    explained_variance   | 0.957        |
|    learning_rate        | 0.000169     |
|    loss                 | 16.4         |
|    n_updates            | 1176         |
|    policy_gradient_loss | -0.00236     |
|    value_loss           | 34.3         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 244         |
|    iterations           | 51          |
|    time_elapsed         | 6830        |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 242         |
|    iterations           | 60          |
|    time_elapsed         | 8094        |
|    total_timesteps      | 1966080     |
| train/                  |             |
|    approx_kl            | 0.004828313 |
|    clip_fraction        | 0.0522      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.48       |
|    explained_variance   | 0.956       |
|    learning_rate        | 0.000163    |
|    loss                 | 17          |
|    n_updates            | 1416        |
|    policy_gradient_loss | -0.00294    |
|    value_loss           | 35.2        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 243          |
|    iterations           | 61           |
|    time_elapsed         | 8224         |
|    total_timesteps      | 1

-----------------------------------------
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 71          |
|    time_elapsed         | 9571        |
|    total_timesteps      | 2326528     |
| train/                  |             |
|    approx_kl            | 0.006227662 |
|    clip_fraction        | 0.0464      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.26       |
|    explained_variance   | 0.958       |
|    learning_rate        | 0.000156    |
|    loss                 | 17.6        |
|    n_updates            | 1680        |
|    policy_gradient_loss | -0.00233    |
|    value_loss           | 33.9        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 243          |
|    iterations           | 72           |
|    time_elapsed         | 9705         |
|    total_timesteps      | 2

New best mean reward!
--------------------------------
| time/              |         |
|    fps             | 242     |
|    iterations      | 81      |
|    time_elapsed    | 10959   |
|    total_timesteps | 2654208 |
--------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 242         |
|    iterations           | 82          |
|    time_elapsed         | 11093       |
|    total_timesteps      | 2686976     |
| train/                  |             |
|    approx_kl            | 0.004669955 |
|    clip_fraction        | 0.0433      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.17       |
|    explained_variance   | 0.96        |
|    learning_rate        | 0.00015     |
|    loss                 | 15.8        |
|    n_updates            | 1944        |
|    policy_gradient_loss | -0.00329    |
|    value_loss           | 32.7        |
---------------------------------

------------------------------------------
| time/                   |              |
|    fps                  | 242          |
|    iterations           | 92           |
|    time_elapsed         | 12446        |
|    total_timesteps      | 3014656      |
| train/                  |              |
|    approx_kl            | 0.0028432817 |
|    clip_fraction        | 0.0664       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.05        |
|    explained_variance   | 0.961        |
|    learning_rate        | 0.000143     |
|    loss                 | 15.3         |
|    n_updates            | 2184         |
|    policy_gradient_loss | -0.00213     |
|    value_loss           | 31.4         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 242         |
|    iterations           | 93          |
|    time_elapsed         | 12576       |
|    total_times

Eval num_timesteps=3360000, episode_reward=109.11 +/- 8.54
Episode length: 25.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 25           |
|    mean_reward          | 109          |
| time/                   |              |
|    total_timesteps      | 3360000      |
| train/                  |              |
|    approx_kl            | 0.0065817125 |
|    clip_fraction        | 0.0572       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.02        |
|    explained_variance   | 0.96         |
|    learning_rate        | 0.000136     |
|    loss                 | 15.5         |
|    n_updates            | 2448         |
|    policy_gradient_loss | -0.00147     |
|    value_loss           | 32.4         |
------------------------------------------
--------------------------------
| time/              |         |
|    fps             | 241     |
|    iterations      | 103     |
|   

------------------------------------------
| time/                   |              |
|    fps                  | 241          |
|    iterations           | 113          |
|    time_elapsed         | 15317        |
|    total_timesteps      | 3702784      |
| train/                  |              |
|    approx_kl            | 0.0053595393 |
|    clip_fraction        | 0.0578       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.976       |
|    explained_variance   | 0.96         |
|    learning_rate        | 0.00013      |
|    loss                 | 15.5         |
|    n_updates            | 2688         |
|    policy_gradient_loss | -0.00217     |
|    value_loss           | 32.6         |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 241         |
|    iterations           | 114         |
|    time_elapsed         | 15450       |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 241          |
|    iterations           | 124          |
|    time_elapsed         | 16802        |
|    total_timesteps      | 4063232      |
| train/                  |              |
|    approx_kl            | 0.0031929878 |
|    clip_fraction        | 0.0422       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.917       |
|    explained_variance   | 0.96         |
|    learning_rate        | 0.000123     |
|    loss                 | 15.8         |
|    n_updates            | 2952         |
|    policy_gradient_loss | -0.00205     |
|    value_loss           | 32.1         |
------------------------------------------
Eval num_timesteps=4080000, episode_reward=108.92 +/- 9.17
Episode length: 25.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 25          |
|    mean_

-----------------------------------------
| time/                   |             |
|    fps                  | 241         |
|    iterations           | 134         |
|    time_elapsed         | 18193       |
|    total_timesteps      | 4390912     |
| train/                  |             |
|    approx_kl            | 0.051140424 |
|    clip_fraction        | 0.0693      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.874      |
|    explained_variance   | 0.959       |
|    learning_rate        | 0.000117    |
|    loss                 | 16          |
|    n_updates            | 3192        |
|    policy_gradient_loss | -0.000528   |
|    value_loss           | 33.4        |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 241          |
|    iterations           | 135          |
|    time_elapsed         | 18326        |
|    total_timesteps      | 4

-----------------------------------------
| time/                   |             |
|    fps                  | 241         |
|    iterations           | 145         |
|    time_elapsed         | 19678       |
|    total_timesteps      | 4751360     |
| train/                  |             |
|    approx_kl            | 0.002478474 |
|    clip_fraction        | 0.0312      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.826      |
|    explained_variance   | 0.959       |
|    learning_rate        | 0.00011     |
|    loss                 | 16.1        |
|    n_updates            | 3456        |
|    policy_gradient_loss | -0.00135    |
|    value_loss           | 33.4        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 241        |
|    iterations           | 146        |
|    time_elapsed         | 19811      |
|    total_timesteps      | 4784128    

------------------------------------------
| time/                   |              |
|    fps                  | 241          |
|    iterations           | 155          |
|    time_elapsed         | 21070        |
|    total_timesteps      | 5079040      |
| train/                  |              |
|    approx_kl            | 0.0033580284 |
|    clip_fraction        | 0.0482       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.77        |
|    explained_variance   | 0.958        |
|    learning_rate        | 0.000104     |
|    loss                 | 16           |
|    n_updates            | 3696         |
|    policy_gradient_loss | -0.000601    |
|    value_loss           | 32.8         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 241          |
|    iterations           | 156          |
|    time_elapsed         | 21200        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 241          |
|    iterations           | 166          |
|    time_elapsed         | 22552        |
|    total_timesteps      | 5439488      |
| train/                  |              |
|    approx_kl            | 0.0030980902 |
|    clip_fraction        | 0.0279       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.744       |
|    explained_variance   | 0.956        |
|    learning_rate        | 9.73e-05     |
|    loss                 | 16.2         |
|    n_updates            | 3960         |
|    policy_gradient_loss | -0.00151     |
|    value_loss           | 34.5         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 241          |
|    iterations           | 167          |
|    time_elapsed         | 22684        |
|    total_

--------------------------------
| time/              |         |
|    fps             | 240     |
|    iterations      | 176     |
|    time_elapsed    | 23942   |
|    total_timesteps | 5767168 |
--------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 240          |
|    iterations           | 177          |
|    time_elapsed         | 24073        |
|    total_timesteps      | 5799936      |
| train/                  |              |
|    approx_kl            | 0.0073357737 |
|    clip_fraction        | 0.0428       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.678       |
|    explained_variance   | 0.956        |
|    learning_rate        | 9.04e-05     |
|    loss                 | 17.6         |
|    n_updates            | 4224         |
|    policy_gradient_loss | -0.000375    |
|    value_loss           | 33.9         |
--------------------------------------

------------------------------------------
| time/                   |              |
|    fps                  | 241          |
|    iterations           | 187          |
|    time_elapsed         | 25422        |
|    total_timesteps      | 6127616      |
| train/                  |              |
|    approx_kl            | 0.0032493654 |
|    clip_fraction        | 0.0315       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.626       |
|    explained_variance   | 0.955        |
|    learning_rate        | 8.42e-05     |
|    loss                 | 16.6         |
|    n_updates            | 4464         |
|    policy_gradient_loss | -0.00103     |
|    value_loss           | 34.5         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 241          |
|    iterations           | 188          |
|    time_elapsed         | 25554        |
|    total_

Eval num_timesteps=6480000, episode_reward=106.36 +/- 9.92
Episode length: 25.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 25           |
|    mean_reward          | 106          |
| time/                   |              |
|    total_timesteps      | 6480000      |
| train/                  |              |
|    approx_kl            | 0.0018966193 |
|    clip_fraction        | 0.0274       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.593       |
|    explained_variance   | 0.958        |
|    learning_rate        | 7.73e-05     |
|    loss                 | 15.2         |
|    n_updates            | 4728         |
|    policy_gradient_loss | -0.00153     |
|    value_loss           | 32.3         |
------------------------------------------
--------------------------------
| time/              |         |
|    fps             | 240     |
|    iterations      | 198     |
|   

------------------------------------------
| time/                   |              |
|    fps                  | 240          |
|    iterations           | 208          |
|    time_elapsed         | 28296        |
|    total_timesteps      | 6815744      |
| train/                  |              |
|    approx_kl            | 0.0016474476 |
|    clip_fraction        | 0.0289       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.51        |
|    explained_variance   | 0.957        |
|    learning_rate        | 7.11e-05     |
|    loss                 | 16.3         |
|    n_updates            | 4968         |
|    policy_gradient_loss | -0.000827    |
|    value_loss           | 33.6         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 240          |
|    iterations           | 209          |
|    time_elapsed         | 28429        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 240          |
|    iterations           | 219          |
|    time_elapsed         | 29783        |
|    total_timesteps      | 7176192      |
| train/                  |              |
|    approx_kl            | 0.0017311156 |
|    clip_fraction        | 0.0318       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.446       |
|    explained_variance   | 0.958        |
|    learning_rate        | 6.43e-05     |
|    loss                 | 15.9         |
|    n_updates            | 5232         |
|    policy_gradient_loss | -0.000648    |
|    value_loss           | 32.5         |
------------------------------------------
Eval num_timesteps=7200000, episode_reward=106.59 +/- 9.97
Episode length: 25.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 25           |
|    me

------------------------------------------
| time/                   |              |
|    fps                  | 240          |
|    iterations           | 229          |
|    time_elapsed         | 31174        |
|    total_timesteps      | 7503872      |
| train/                  |              |
|    approx_kl            | 0.0017289201 |
|    clip_fraction        | 0.0277       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.412       |
|    explained_variance   | 0.957        |
|    learning_rate        | 5.8e-05      |
|    loss                 | 16.6         |
|    n_updates            | 5472         |
|    policy_gradient_loss | -0.000707    |
|    value_loss           | 34           |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 240          |
|    iterations           | 230          |
|    time_elapsed         | 31308        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 240          |
|    iterations           | 240          |
|    time_elapsed         | 32664        |
|    total_timesteps      | 7864320      |
| train/                  |              |
|    approx_kl            | 0.0017810351 |
|    clip_fraction        | 0.0326       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.402       |
|    explained_variance   | 0.956        |
|    learning_rate        | 5.12e-05     |
|    loss                 | 17.5         |
|    n_updates            | 5736         |
|    policy_gradient_loss | -0.000647    |
|    value_loss           | 34           |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 240          |
|    iterations           | 241          |
|    time_elapsed         | 32798        |
|    total_


KeyboardInterrupt



In [20]:
model.save(r"C:\Users\kubaw\Desktop\DELFT\THESIS\CODE\TEST_MODELS\32_ENV_JA_LOW_2")

In [18]:
model = PPO.load(r"C:\Users\kubaw\Desktop\DELFT\THESIS\CODE\TEST_MODELS\32_ENV_JA_HIGH_2\best_model.zip")

In [19]:
evaluate1(1, env_test, model)

Act: [6 6] 
 Obs: [0.9885073  0.9697379  0.8736459  0.9415653  0.91794175 0.84082365
 1.0022224  0.98446006 1.0024734  0.9947046  0.87113607 0.98956496
 0.         0.91676897 0.8619849  0.910265   0.9815112  0.8916455
 0.99362576 0.87393075 0.99298745 0.98399216 0.93280506 0.8674039
 0.94918245 0.98852605 0.8918726  0.8924189  0.98943555 0.93925124
 0.86511976 0.         0.05560338 0.06768829 0.00636943 0.68792313
 0.30723476 0.06991712 0.7552     0.9163851 ] 
 Balance -2700.876967822199
Act: [1 0] 
 Obs: [0.97678894 0.96784407 0.85785264 0.92827374 0.89972305 0.8319673
 0.9901795  0.9718317  0.9979446  0.9917035  0.8615182  0.9896618
 0.9833173  0.91052663 0.8578547  0.90303415 0.96781325 0.88546246
 0.97719145 0.87464756 0.9822516  0.97714096 0.923392   0.857642
 0.93968487 0.98446095 0.8821544  0.8816431  0.9726597  0.93358606
 0.86204153 0.         0.08181785 0.08320452 0.00636943 0.6386011
 0.09698168 0.07411215 0.176      0.8858736 ] 
 Balance 4345.74493741363
Act: [0 0] 
 Obs: [

In [20]:
def evaluate2(episodes, environment, model):
    
    mean_irr = 0
    mean_fin_balance = 0
    irr = 0
    fin_balance = 0
    count = 0
    npv = 0
    list_npv = []
    env_balance = 0
    mean_env_balance = 0

    for ep in range(episodes):

        obs, _ = environment.reset()  # Unpack the tuple and ignore the info part
        done = False

        while not done:
            action, _ = model.predict(obs)  # Now obs is just the observation array
            obs, reward, done, truncated, info = environment.step(action)
            # Extracting the 2nd and 3rd key-value pairs
            keys = list(info.keys())
            values = list(info.values())

            # Getting the 2nd key-value pair
            second_value = values[2]

            # Getting the 3rd key-value pair
            fourth_value = values[4]
            
            fith_value = values[5]
        
        fin_balance += second_value
        npv += fith_value
        count += 1
        
        env_balance += second_value
        
        list_npv.append(fourth_value)
            
    mean_fin_balance = fin_balance/count
    mean_npv = npv/count
    mean_env_balance = env_balance / count

    #print(mean_npv)

    environment.close()
    
    return(mean_npv, mean_env_balance)

In [21]:
evaluate2(1000, env_test, model)

(28832.331212101613, 35878.91282684316)

In [253]:
def basepolicy1(episodes, environment):
    
    mean_irr = 0
    mean_fin_balance = 0
    irr = 0
    fin_balance = 0
    count = 0
    irr_count = 0
    npv = 0
    list_npv = []
    env_balance = 0
    mean_env_balance = 0

    for ep in range(episodes):

        obs, _ = environment.reset()  # Unpack the tuple and ignore the info part
        done = False

        while not done:
            
            action = np.array([0, 0])
            for i, n in enumerate(obs):
                if i < 32:
                    if i < 16:
                        if n < 0.80:
                            action[0] += 1
                    if i >= 16:
                        if n < 0.80:
                            action[1] += 1
                    

            obs, reward, done, truncated, info = environment.step(action)

            # Extracting the 2nd and 3rd key-value pairs
            keys = list(info.keys())
            values = list(info.values())

            # Getting the 2nd key-value pair
            second_value = values[1]

            # Getting the 3rd key-value pair
    
            third_value = values[2]
            fourth_value = values[4]
            fith_value = values[5]
            
        
        fin_balance += second_value
        npv += fourth_value
        count += 1
        
        env_balance += fith_value
        
        list_npv.append(fourth_value)
            
    mean_fin_balance = fin_balance/count
    mean_env_balance = env_balance/count
    mean_npv = npv/count

    #print(mean_npv, "\n", mean_irr, "\n" )

    environment.close()
    
    return(mean_npv, mean_env_balance)

In [254]:
basepolicy1(5000, env_test)

(9306.736562061036, 44404.16625475614)