In [9]:
# imports
import pandas as pd
import numpy as np
import numpy_financial as npf
import random  
import matplotlib.pyplot as plt
import time

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO, DQN, A2C
from stable_baselines3.common.callbacks import CallbackList, CheckpointCallback, EvalCallback
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from sb3_contrib import RecurrentPPO

from environment_fx_no_env import calculate_import_export, test1, test2, test3, evaluate1, evaluate2, basepolicy

import gymnasium as gym
from gymnasium import spaces

import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
from typing import Callable

import torch
import torch as th
from torch import nn
import torch.nn as nn

In [10]:
# import and modify data

# Assuming the file is a CSV and specifying the correct path and filename
file_path = r"file_path"

# Use pandas to read the CSV file
AC_OUTPUT = pd.read_csv(file_path + "/AC_OUTPUT_JA")
elec_df = pd.read_csv(file_path + "/hourly_consumption2.csv")
import_price = pd.read_csv(file_path + "/electricity_tariff.csv")

#elec_df = elec_df * 1000
elec_df = elec_df.drop('HourOfYear', axis=1)

elec_df['hour_of_day'] = np.arange(8760) % 24
elec_df['day_of_week'] = np.arange(8760) // 24 % 7  # 0 is Monday, 6 is Sunday

# Define rates
peak_rate = 1.45
normal_rate = 1
off_peak_rate = 0.85

# Function to determine rate based on hour and day
def determine_rate(hour, day):
    if day < 5:  # Monday to Friday
        if 16 <= hour < 21:  # 4pm to 9pm
            return peak_rate
        elif 6 <= hour < 10:  # 7am to 9am and 10am to 3pm
            return normal_rate
        else:  # Off-peak times
            return off_peak_rate
    else:  # Weekend
        if 16 <= hour < 21:  # 4pm to 9pm
            return normal_rate
        else:  # Off-peak times
            return off_peak_rate
    
# Apply the function to each row to determine the rate
elec_df['rate'] = elec_df.apply(lambda row: determine_rate(row['hour_of_day'], row['day_of_week']), axis=1)

import_price_df = import_price.drop(columns=['x'])
import_price_df = import_price_df[:-26]

train_cols = random.sample(list(import_price_df.columns), 7000)
import_price_train = import_price_df[train_cols]
test_cols = [col for col in import_price_df.columns if col not in train_cols]
import_price_test = import_price_df[test_cols]

Eff = pd.read_csv(file_path + "/Efficency_impr")
Eff = (Eff)/100 + 1

CAPEX = pd.read_csv(file_path + "/CAPEX_JA.csv")
CAPEX_JA = (CAPEX[:26]) * 1.3


train_cols_CAPEX = random.sample(list(CAPEX_JA.columns), 7000)
test_cols_CAPEX = [col for col in CAPEX_JA.columns if col not in train_cols_CAPEX]

CAPEX_JA_train = CAPEX_JA[train_cols_CAPEX]
CAPEX_JA_test = CAPEX_JA[test_cols_CAPEX]

train_cols_Eff = random.sample(list(Eff.columns), 7000)
test_cols_Eff = [col for col in Eff.columns if col not in train_cols_Eff]

Eff_train = Eff[train_cols_Eff]
Eff_test = Eff[test_cols_Eff]

AC_OUTPUT_arr = (np.array(AC_OUTPUT.T)).flatten()

Eff_train_arr = np.array(Eff_train.T)
Eff_test_arr = np.array(Eff_test.T)

CAPEX_JA_train_arr = np.array(CAPEX_JA_train.T)
CAPEX_JA_test_arr = np.array(CAPEX_JA_test.T)

elec_consum_arr = np.array(elec_df["Consumption"])
import_price_rate = np.array(elec_df["rate"])

import_price_train_arr = np.array(import_price_train.T)
import_price_test_arr = np.array(import_price_train.T)

In [11]:
class TrainEnvironment(gym.Env):
    def __init__(self, AC_OUTPUT_arr, elec_consum_arr, import_price_rate, import_tariff, efficency, CAPEX):
        
        # Price per watthour
        self.import_price_df = import_tariff
        self.import_price_at_zero = np.float32(0.00035)
        self.import_price_rate = import_price_rate
        
        # Energy Balance
        self.AC_OUTPUT = AC_OUTPUT_arr
        self.elec_df = elec_consum_arr
        self.max_export = 4000
        self.number_of_panels = 12
        
        # Degradation
        self.deg_mu = 0.82 # Trina: 1.19, JA: 0.82, Maxeon: 0.67
        self.deg_std = 0.555 
        
        self.phi = 30 # Trina: 15, JA: 30, Maxeon: 50

        
        # Efficency Development
        self.efficency_develop_df = efficency
        self.efficency_at_zero = 1.0
        
        # Costs
        self.power_at_zero = 415  # Trina: 265, JA: 415, Maxeon: 435
        self.cost_per_Wp_df_at_zero = 0.69 # Trina: 0.36, JA: 0.69, Maxeon: 1.58
        self.cost_per_Wp_df = CAPEX
        self.initial_other_costs = 150
        
        self.operational_cost = 16.8
        
        self.loan_interest_rate = 1.10
        self.normal_interest_rate = 1.02
        
        self.low_budget = 0 # Low budget: 0, High Budget: 750
        self.high_budget = 750 # Low budget: 750, High Budget: 1500
                        
        # Spaces and length
        self.action_space = spaces.Discrete(self.number_of_panels + 1)
        self.observation_space = spaces.Box(0, 1.25, shape=(self.number_of_panels + 7,))
        self.episode_len = 25
        self.months_per_timestep = 12
        
    def _get_obs(self):
        
        return self.observation
    
    def calculate_import_export(self, AC_OUTPUT, elec_df, export_price, import_price):
        
        """
        Calculate the annual Wh of energy exported to the grid (exported) and saved (minimised)
        """
        
        AC_OUTPUT_tot = self._get_obs()[0:self.number_of_panels].sum() * self.AC_OUTPUT 

        exported = (AC_OUTPUT_tot - self.elec_df).clip(min=0, max = self.max_export)        
        export_revenue = (export_price * exported).sum()

        
        minimised = AC_OUTPUT_tot - exported 
        minimised_revenue = (minimised * (self.import_price_rate * import_price)).sum()
        

        return export_revenue, AC_OUTPUT_tot, minimised_revenue
    
    def reset(self, seed=None):
        
        """
        Reset the environment to the original state at t=1
        """
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
        
        # Panels
        self.init_obs = np.random.uniform(0, 1, size=self.number_of_panels).astype(np.float32)
        self.init_obs = np.where(self.init_obs < 0.5, 0.0, np.random.uniform(0.85, 1.0, size=self.number_of_panels))

        # Combine all initialization into a single step for efficiency
        self.import_price_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min()) / (self.import_price_df.max().max() - self.import_price_df.min().min())
        self.FiT_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min() * 0.33) / (self.import_price_df.max().max() - self.import_price_df.min().min() * 0.33)
        self.efficency_at_zero_norm = (self.efficency_at_zero - 0.999) / (1.156 - 0.999)
        self.panel_cost_and_inverter_at_zero_norm = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
        
        self.current_budget_constraint = np.random.randint(self.low_budget, self.high_budget)
        self.next_step_budget_constraint = 0
        
        
        # Complete observation initialization in one go
        self.observation = np.concatenate([
            self.init_obs,
            [self.import_price_at_zero_norm, self.FiT_at_zero_norm, self.efficency_at_zero_norm, 
             self.panel_cost_and_inverter_at_zero_norm, 0., 0., 0.]
        ]).astype(np.float32)

        self.previous_observation = self.observation.copy()

        # RANDOM IMPORT PRICE
        self.random_import_price = self.import_price_df[np.random.choice(self.import_price_df.shape[0])] 

        # RANDOM EFFICENCY
        self.random_efficency_develop = self.efficency_develop_df[np.random.choice(self.efficency_develop_df.shape[0])]   
        
        # RANDOM COST PER WP
        self.random_cost_per_Wp = self.cost_per_Wp_df[np.random.choice(self.cost_per_Wp_df.shape[0])]   
        
        
        self.episode_len = 25  
    
        info = {}
        
        # RESET BALANCES
        self.fin_balance_tot = 0
        self.reward_tot = 0
        self.env_balance_tot = 0
        self.produced = 0
        self.other_costs = 0
        self.FiT = 0.0004
        self.next_FiT = 0.0004

        self.total_cash_flow = []
        self.annual_cash_flow = 0
                
        self.due_loans = [0, 0, 0, 0] 
        self.current_interest = 0
        self.step_total_interest = 1
        self.survival = np.zeros(self.number_of_panels, dtype=np.float32)
        self.resale_values = array_of_zeros = np.zeros(self.number_of_panels, dtype=np.float32)
        
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)
        
        self.two_year_ago_interest = 0
        self.first_year_interest = []
        self.second_year_interest = [0]
        self.third_year_interest = [0, 0]
        self.fourth_year_interest = [0, 0, 0]
        self.next_year_total = 0
        
        self.survival = np.zeros(self.number_of_panels, dtype=np.float32)
    
        return self.observation, info
    
    def calculate_resale(self, initial_panel_cost, indices):
        
        self.resale_values[indices] = initial_panel_cost
        
        self.resale_values = self.resale_values * 0.85
        
        for count, i in enumerate(self.broke):
            if i == 1:
                self.resale_values[count] = 0
        
        resale_step = self.resale_values[indices].sum()
        
        return resale_step
    
    def calculate_panel_inv_cost(self, cost_per_Wp):
        
        PW_ep = self.efficency_develop * self.power_at_zero
        
        panel_cost_and_inverter = PW_ep * cost_per_Wp
        
        return panel_cost_and_inverter
        
    def calculate_penalty(self, current_step, annual_expense):
              
        year = 25 - current_step
        
        if year > 0:
            self.current_budget_constraint = self.next_step_budget_constraint    
            
        
        self.current_interest = self.next_year_total
        annual_expense = (-annual_expense)
        value = 0 
        loan = 0
        annual_interest = 0

        if annual_expense > self.current_budget_constraint:
            loan = (self.current_budget_constraint - annual_expense)
            value = annual_expense / self.current_budget_constraint
            periods = 2 if value < 2 else 3 if value < 3 else 4

            annual_interest = loan / periods
            interest_multiplier = 1

            for i in range(4):
                if i < periods:
                    self.due_loans[i] = annual_interest * interest_multiplier
                    interest_multiplier *= self.loan_interest_rate
                else:
                    self.due_loans[i] = 0
        else:
             self.due_loans = [0, 0, 0, 0]
    
        self.first_year_interest.append(self.due_loans[0])
        self.second_year_interest.append(self.due_loans[1])
        self.third_year_interest.append(self.due_loans[2])
        self.fourth_year_interest.append(self.due_loans[3])
    
    
        self.next_year_total = self.first_year_interest[year] + self.second_year_interest[year] + self.third_year_interest[year] + self.fourth_year_interest[year]
        
        self.next_step_budget_constraint = np.random.randint(self.low_budget, self.high_budget) * self.step_total_interest
        current_budget_observation = (self.next_step_budget_constraint - self.low_budget * self.step_total_interest) / (self.high_budget * self.step_total_interest - self.low_budget * self.step_total_interest) 
        self.observation[self.number_of_panels + 6] = current_budget_observation
                
        return self.current_interest, self.due_loans, self.next_year_total
        
    def calculate_total_CAPEX(self, action_step, panel_cost_and_inverter):
        """
        Calculate CAPEX each step in a vectorized manner.
        """
        BOS = panel_cost_and_inverter * 0.55
        number_installed = int(np.sum(action_step))

        # Calculate costs from module and inverter
        panel_cost_and_inverter_step = panel_cost_and_inverter * number_installed

        # Calculate other installation costs
        if number_installed == 0:
            other_costs = 0
        elif number_installed == 1:
            other_costs = self.initial_other_costs * self.step_total_interest
        else:
            discounts = 0.9 ** np.arange(number_installed)
            other_costs = (self.initial_other_costs * self.step_total_interest * discounts).sum()

        # Calculate BOS costs using vector operations
        is_new_installation = (self.previous_observation[:number_installed] == 0) & (action_step[:number_installed] == 1)
        is_replacement = (self.previous_observation[:number_installed] > 0) & (action_step[:number_installed] == 1)
        BOS_cost = np.sum(BOS * is_new_installation) + np.sum((BOS / 2) * is_replacement)

        # Sum total CAPEX
        total_CAPEX = panel_cost_and_inverter_step + BOS_cost + other_costs

        return total_CAPEX, panel_cost_and_inverter
        
    def failure(self, actions):
        
        beta = 3  # Shape parameter

        # Determine which panels are active based on the actions and previous observations.
        if self.episode_len == 24:
            active_panels = (self.observation[:self.number_of_panels] > 0.85)
        else:
            active_panels = (self.observation[:self.number_of_panels] == self.efficency_develop)

        # Calculate lifespan for all active panels at once
        lifespans = np.random.weibull(beta, self.number_of_panels) * self.phi
        lifespans = np.where(active_panels, lifespans, 0)  # Apply lifespan only to active panels

        # Adjust survival times based on episode length
        self.survival[:self.number_of_panels] = np.where(
            active_panels,
            np.abs(lifespans.astype(int)) + np.abs(self.episode_len - 25),
            self.survival[:self.number_of_panels]
        )

        return self.survival

    def calculate_FiT(self, episodes, import_price):
            
        self.FiT = import_price
            
        if episodes == 25:
            self.FiT = self.FiT
            
        elif episodes == 24 or episodes == 23:
            self.FiT = self.FiT * 0.64
            
        elif episodes == 22:
            self.FiT = self.FiT * 0.46
            
        elif episodes == 21:
            self.FiT = self.FiT * 0.55
            
        elif episodes < 20:
            self.FiT = self.FiT * 0.33
            
        elif episodes == 20:
            self.FiT = self.FiT * 0.37
            
        return self.FiT
                        
    def step(self, action):
        
        """
        defines actions, reward etc.
        """
        
        # RESET THE ANNUAL BALANCES
        self.total_CAPEX = 0
        self.pv_costs = 0
        self.fin_balance = 0
        self.number_installed = 0
        current_penalty = 0
        self.other_costs = 0
        next_step_penalty = 0
        self.step_total_interest = self.step_total_interest * self.normal_interest_rate
        current_operational_costs = self.operational_cost * self.step_total_interest
        
        
        self.cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
        self.import_price = self.random_import_price[abs(self.episode_len - 25)]
        self.efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
           
        self.panel_cost_and_inverter = self.calculate_panel_inv_cost(self.cost_per_Wp)
        FiT = self.calculate_FiT(self.episode_len, self.import_price)
        
        reward = 0   
        actions_step = np.random.rand(8)
        
        # Find indices of the lowest 'action' values in previous_observation
        indices = np.argsort(self.previous_observation[:self.number_of_panels])[:action]

        # Replace these indices in the observation with efficiency_develop
        self.observation[:self.number_of_panels][indices] = self.efficency_develop

        # Copy over the other values from previous_observation to observation
        mask = np.ones(len(self.previous_observation[:self.number_of_panels]), dtype=bool)
        mask[indices] = False
        self.observation[:self.number_of_panels][mask] = self.previous_observation[:self.number_of_panels][mask]

        replaced_panels = np.zeros(len(self.previous_observation[:self.number_of_panels]), dtype=int)
        replaced_panels[indices] = 1

        instaltion = (self.observation[:self.number_of_panels] > 0).astype(int)
        self.pv_costs -= instaltion.sum() * current_operational_costs

        actions_step = np.array(replaced_panels)

            
        if action > 0:
            step_CAPEX, panel_cost_and_inverter = self.calculate_total_CAPEX(actions_step, self.panel_cost_and_inverter)
            self.pv_costs -= step_CAPEX
            
        else:
            panel_cost_and_inverter = 0
                
        next_observation = self._get_obs()

        
        # Calculate the Reslae value
        resale = self.calculate_resale(panel_cost_and_inverter, indices) #  ***
        
        self.pv_costs += resale
 
        
        # CALCULATE THE BUDGET INTEREST
        current_penalty, due_loans, next_step_penalty = self.calculate_penalty(self.episode_len, self.pv_costs)

        
        # CALCULATE THE ENERGY YIELD
        exported_revenue, AC_OUTPUT_tot, minimised_revenue = self.calculate_import_export(self.AC_OUTPUT, 
                                                                          self.elec_df, FiT, self.import_price)        
        
        pv_costs_observation = - self.pv_costs / 10000
        self.observation[self.number_of_panels + 4] = pv_costs_observation
        
        next_step_penalty_observation = - next_step_penalty / 8000
        self.observation[self.number_of_panels + 5] = next_step_penalty_observation
        
        
        # CALCULATE STEP BALANCES
        self.fin_balance += self.pv_costs
        self.fin_balance += current_penalty
        self.fin_balance += float(exported_revenue + minimised_revenue)
        
        # CALCULATE TOTAL BALANCES
        self.fin_balance_tot += self.fin_balance                
        
        # SUBSTRACT 1 FOR TIMESTEP
        self.episode_len -= 1
        done = self.episode_len <= 0
        
        #reward = self.fin_balance_tot / 1000 if done else 0
        reward = self.fin_balance / 1000
        
        # FAILURE
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)
        survival = self.failure(actions_step)
        
        for c, p in enumerate(survival):
            
            if c < self.number_of_panels:

                if p - 1 <= abs(self.episode_len - 24):
                    self.broke[c] = 1
                    self.observation[c] = 0
        
        # DEGRADATION RATE
        # Applying degradation only to panels that are operational (above 0.1 efficiency)
        active_panels = self.observation[:self.number_of_panels] > 0.1
        degradations = np.random.normal(self.deg_mu, self.deg_std, size=self.number_of_panels) / 100
        self.observation[:self.number_of_panels][active_panels] -= degradations[active_panels]
        
        if not done: 
        
            self.next_cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
            self.next_import_price = self.random_import_price[abs(self.episode_len - 25)]
            self.next_efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
            next_FIT = self.calculate_FiT(self.episode_len, self.next_import_price)
        
            price_observation = (self.next_import_price - 0.00022499) / (0.0020798 - 0.00022499)
            self.observation[self.number_of_panels] = price_observation

            FiT_observation = (next_FIT - 0.00022499 * 0.33) / (0.0020798 - 0.00022499 * 0.33)
            self.observation[self.number_of_panels + 1] = FiT_observation

            eff_observation = (self.next_efficency_develop - 0.999) / (1.156 - 0.999)
            self.observation[self.number_of_panels + 2] = eff_observation

            cost_per_Wp_observation = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
            self.observation[self.number_of_panels + 3] = cost_per_Wp_observation
        
        info = {"step financial balance (eur):": self.fin_balance,
               "total financial balance: (eur)": self.fin_balance_tot,
               "internal rate of return": 0,
               "current_interest": resale,
                "net present value": 0}
         
        
        self.previous_observation = self.observation.copy()
        
        return self.observation, reward, done, False, info

In [12]:
class TestEnvironment(gym.Env):
    def __init__(self, AC_OUTPUT_arr, elec_consum_arr, import_price_rate, import_tariff, efficency, CAPEX):
        
        # Price per watthour
        self.import_price_df = import_tariff
        self.import_price_at_zero = np.float32(0.00035)
        self.import_price_rate = import_price_rate
        
        # Energy Balance
        self.AC_OUTPUT = AC_OUTPUT_arr
        self.elec_df = elec_consum_arr
        self.max_export = 4000
        self.number_of_panels = 12
        
        # Degradation
        self.deg_mu = 0.82 # Trina: 1.19, JA: 0.82, Maxeon: 0.67
        self.deg_std = 0.555 
        
        self.phi = 30 # Trina: 15, JA: 30, Maxeon: 50

        
        # Efficency Development
        self.efficency_develop_df = efficency
        self.efficency_at_zero = 1.0
        
        # Costs
        self.power_at_zero = 415  # Trina: 265, JA: 415, Maxeon: 435
        self.cost_per_Wp_df_at_zero = 0.69 # Trina: 0.36, JA: 0.69, Maxeon: 1.58
        self.cost_per_Wp_df = CAPEX
        self.initial_other_costs = 150
        
        self.operational_cost = 16.8
        
        self.loan_interest_rate = 1.10
        self.normal_interest_rate = 1.02
        
        self.low_budget = 0 # Low budget: 0, High Budget: 750
        self.high_budget = 750 # Low budget: 750, High Budget: 1500
                        
        # Spaces and length
        self.action_space = spaces.Discrete(self.number_of_panels + 1)
        self.observation_space = spaces.Box(0, 1.25, shape=(self.number_of_panels + 7,))
        self.episode_len = 25
        self.months_per_timestep = 12
        
    def _get_obs(self):
        
        return self.observation
    
    def calculate_import_export(self, AC_OUTPUT, elec_df, export_price, import_price):
        
        """
        Calculate the annual Wh of energy exported to the grid (exported) and saved (minimised)
        """
        
        AC_OUTPUT_tot = self._get_obs()[0:self.number_of_panels].sum() * self.AC_OUTPUT 

        exported = (AC_OUTPUT_tot - self.elec_df).clip(min=0, max = self.max_export)        
        export_revenue = (export_price * exported).sum()

        
        minimised = AC_OUTPUT_tot - exported 
        minimised_revenue = (minimised * (self.import_price_rate * import_price)).sum()
        

        return export_revenue, AC_OUTPUT_tot, minimised_revenue
    
    def reset(self, seed=None):
        
        """
        Reset the environment to the original state at t=1
        """
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
            
        # Panels
        self.init_obs = np.random.uniform(0, 1, size=self.number_of_panels).astype(np.float32)
        self.init_obs = np.where(self.init_obs < 0.5, 0.0, np.random.uniform(0.85, 1.0, size=self.number_of_panels))

        # Combine all initialization into a single step for efficiency
        self.import_price_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min()) / (self.import_price_df.max().max() - self.import_price_df.min().min())
        self.FiT_at_zero_norm = (self.import_price_at_zero - self.import_price_df.min().min() * 0.33) / (self.import_price_df.max().max() - self.import_price_df.min().min() * 0.33)
        self.efficency_at_zero_norm = (self.efficency_at_zero - 0.999) / (1.156 - 0.999)
        self.panel_cost_and_inverter_at_zero_norm = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())

        
        self.current_budget_constraint = np.random.randint(self.low_budget, self.high_budget)
        self.next_step_budget_constraint = 0
        
        
        # Complete observation initialization in one go
        self.observation = np.concatenate([
            self.init_obs,
            [self.import_price_at_zero_norm, self.FiT_at_zero_norm, self.efficency_at_zero_norm, 
             self.panel_cost_and_inverter_at_zero_norm, 0., 0., 0.]
        ]).astype(np.float32)

        self.previous_observation = self.observation.copy()

        # RANDOM IMPORT PRICE
        self.random_import_price = self.import_price_df[np.random.choice(self.import_price_df.shape[0])] 

        # RANDOM EFFICENCY
        self.random_efficency_develop = self.efficency_develop_df[np.random.choice(self.efficency_develop_df.shape[0])]   
        
        # RANDOM COST PER WP
        self.random_cost_per_Wp = self.cost_per_Wp_df[np.random.choice(self.cost_per_Wp_df.shape[0])]   
        
        
        self.episode_len = 25  
    
        info = {}
        
        # RESET BALANCES
        self.fin_balance_tot = 0
        self.reward_tot = 0
        self.env_balance_tot = 0
        self.produced = 0
        self.other_costs = 0
        self.FiT = 0.0004
        self.next_FiT = 0.0004
        self.resale_values = array_of_zeros = np.zeros(self.number_of_panels, dtype=np.float32)
        
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)
        self.total_cash_flow = []
        self.annual_cash_flow = 0
                
        self.due_loans = [0, 0, 0, 0] 
        self.current_interest = 0
        self.step_total_interest = 1
        
        self.two_year_ago_interest = 0
        self.first_year_interest = []
        self.second_year_interest = [0]
        self.third_year_interest = [0, 0]
        self.fourth_year_interest = [0, 0, 0]
        self.next_year_total = 0
        
        self.survival = np.zeros(self.number_of_panels, dtype=np.float32)
    
        return self.observation, info
    
    def calculate_resale(self, initial_panel_cost, indices):
        
        self.resale_values[indices] = initial_panel_cost
        
        self.resale_values = self.resale_values * 0.85
        
        for count, i in enumerate(self.broke):
            if i == 1:
                self.resale_values[count] = 0
        
        resale_step = self.resale_values[indices].sum()
        
        return resale_step
    
    def calculate_panel_inv_cost(self, cost_per_Wp):
        
        PW_ep = self.efficency_develop * self.power_at_zero
        
        panel_cost_and_inverter = PW_ep * cost_per_Wp
        
        return panel_cost_and_inverter
    
    def calculate_irr_and_npv(self, pv_cost, minimised_revenue, export_revenue, penalty):
                
        """
        Calculates total cash flow of the project needed for the internal rate of return
        """ 
        self.expences = 0
        self.annual_cash_flow = 0
        initial_cost = 0
        
        self.expences = pv_cost
        self.annual_cash_flow = self.expences + export_revenue + minimised_revenue + penalty
        initial_cost_q, x = self.calculate_total_CAPEX(self.init_obs, self.panel_cost_and_inverter)
        initial_cost = - initial_cost_q
        
        if self.episode_len == 24:
            self.total_cash_flow.append(initial_cost + self.annual_cash_flow) 
        else:
            self.total_cash_flow.append(self.annual_cash_flow) 
        
        return self.total_cash_flow
        
    def calculate_penalty(self, current_step, annual_expense):
              
        year = 25 - current_step
        
        if year > 0:
            self.current_budget_constraint = self.next_step_budget_constraint    
            
        
        self.current_interest = self.next_year_total
        annual_expense = (-annual_expense)
        value = 0 
        loan = 0
        annual_interest = 0

        if annual_expense > self.current_budget_constraint:
            loan = (self.current_budget_constraint - annual_expense)
            value = annual_expense / self.current_budget_constraint
            periods = 2 if value < 2 else 3 if value < 3 else 4

            annual_interest = loan / periods
            interest_multiplier = 1

            for i in range(4):
                if i < periods:
                    self.due_loans[i] = annual_interest * interest_multiplier
                    interest_multiplier *= self.loan_interest_rate
                else:
                    self.due_loans[i] = 0
        else:
             self.due_loans = [0, 0, 0, 0]
    
        self.first_year_interest.append(self.due_loans[0])
        self.second_year_interest.append(self.due_loans[1])
        self.third_year_interest.append(self.due_loans[2])
        self.fourth_year_interest.append(self.due_loans[3])
    
    
        self.next_year_total = self.first_year_interest[year] + self.second_year_interest[year] + self.third_year_interest[year] + self.fourth_year_interest[year]
        
        self.next_step_budget_constraint = np.random.randint(self.low_budget, self.high_budget) * self.step_total_interest
        current_budget_observation = (self.next_step_budget_constraint - self.low_budget * self.step_total_interest) / (self.high_budget * self.step_total_interest - self.low_budget * self.step_total_interest) 
        self.observation[self.number_of_panels + 6] = current_budget_observation
                
        return self.current_interest, self.due_loans, self.next_year_total
        
    def calculate_total_CAPEX(self, action_step, panel_cost_and_inverter):
        """
        Calculate CAPEX each step in a vectorized manner.
        """
        BOS = panel_cost_and_inverter * 0.55
        number_installed = int(np.sum(action_step))

        # Calculate costs from module and inverter
        panel_cost_and_inverter_step = panel_cost_and_inverter * number_installed

        # Calculate other installation costs
        if number_installed == 0:
            other_costs = 0
        elif number_installed == 1:
            other_costs = self.initial_other_costs * self.step_total_interest
        else:
            discounts = 0.9 ** np.arange(number_installed)
            other_costs = (self.initial_other_costs * self.step_total_interest * discounts).sum()

        # Calculate BOS costs using vector operations
        is_new_installation = (self.previous_observation[:number_installed] == 0) & (action_step[:number_installed] == 1)
        is_replacement = (self.previous_observation[:number_installed] > 0) & (action_step[:number_installed] == 1)
        BOS_cost = np.sum(BOS * is_new_installation) + np.sum((BOS / 2) * is_replacement)

        # Sum total CAPEX
        total_CAPEX = panel_cost_and_inverter_step + BOS_cost + other_costs

        return total_CAPEX, panel_cost_and_inverter
        
    def failure(self, actions):
        
        beta = 3  # Shape parameter

        # Determine which panels are active based on the actions and previous observations.
        if self.episode_len == 24:
            active_panels = (self.observation[:self.number_of_panels] > 0.85)
        else:
            active_panels = (self.observation[:self.number_of_panels] == self.efficency_develop)

        # Calculate lifespan for all active panels at once
        lifespans = np.random.weibull(beta, self.number_of_panels) * self.phi
        lifespans = np.where(active_panels, lifespans, 0)  # Apply lifespan only to active panels

        # Adjust survival times based on episode length
        self.survival[:self.number_of_panels] = np.where(
            active_panels,
            np.abs(lifespans.astype(int)) + np.abs(self.episode_len - 25),
            self.survival[:self.number_of_panels]
        )

        return self.survival

    def calculate_FiT(self, episodes, import_price):
            
        self.FiT = import_price
            
        if episodes == 25:
            self.FiT = self.FiT
            
        elif episodes == 24 or episodes == 23:
            self.FiT = self.FiT * 0.64
            
        elif episodes == 22:
            self.FiT = self.FiT * 0.46
            
        elif episodes == 21:
            self.FiT = self.FiT * 0.55
            
        elif episodes < 20:
            self.FiT = self.FiT * 0.33
            
        elif episodes == 20:
            self.FiT = self.FiT * 0.37
            
        return self.FiT
                        
    def step(self, action):
        
        """
        defines actions, reward etc.
        """
        
        # RESET THE ANNUAL BALANCES
        self.total_CAPEX = 0
        self.pv_costs = 0
        self.fin_balance = 0
        self.number_installed = 0
        irr_fin = 0
        npv_fin = 0
        current_penalty = 0
        self.other_costs = 0
        next_step_penalty = 0
        self.step_total_interest = self.step_total_interest * self.normal_interest_rate
        current_operational_costs = self.operational_cost * self.step_total_interest
        
        
        self.cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
        self.import_price = self.random_import_price[abs(self.episode_len - 25)]
        self.efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
           
        self.panel_cost_and_inverter = self.calculate_panel_inv_cost(self.cost_per_Wp)
        FiT = self.calculate_FiT(self.episode_len, self.import_price)
        
        reward = 0   
        actions_step = np.random.rand(self.number_of_panels + 1)
        
        
        # Find indices of the lowest 'action' values in previous_observation
        indices = np.argsort(self.previous_observation[:self.number_of_panels])[:action]

        # Replace these indices in the observation with efficiency_develop
        self.observation[:self.number_of_panels][indices] = self.efficency_develop

        # Copy over the other values from previous_observation to observation
        mask = np.ones(len(self.previous_observation[:self.number_of_panels]), dtype=bool)
        mask[indices] = False
        self.observation[:self.number_of_panels][mask] = self.previous_observation[:self.number_of_panels][mask]

        replaced_panels = np.zeros(len(self.previous_observation[:self.number_of_panels]), dtype=int)
        replaced_panels[indices] = 1

        instaltion = (self.observation[:self.number_of_panels] > 0).astype(int)
        self.pv_costs -= instaltion.sum() * current_operational_costs

        actions_step = np.array(replaced_panels)

            
        if action > 0:
            step_CAPEX, panel_cost_and_inverter = self.calculate_total_CAPEX(actions_step, self.panel_cost_and_inverter)
            self.pv_costs -= step_CAPEX
            
        else:
            panel_cost_and_inverter = 0
                
        next_observation = self._get_obs()

        # Calculate the Reslae value
        resale = self.calculate_resale(panel_cost_and_inverter, indices) #  ***
        
        self.pv_costs += resale

        
        # CALCULATE THE BUDGET INTEREST
        current_penalty, due_loans, next_step_penalty = self.calculate_penalty(self.episode_len, self.pv_costs)
        
        
        # CALCULATE THE ENERGY YIELD
        exported_revenue, AC_OUTPUT_tot, minimised_revenue = self.calculate_import_export(self.AC_OUTPUT, 
                                                                          self.elec_df, FiT, self.import_price)        
        
        pv_costs_observation = - self.pv_costs / 10000
        self.observation[self.number_of_panels + 4] = pv_costs_observation
        
        next_step_penalty_observation = - next_step_penalty / 8000
        self.observation[self.number_of_panels + 5] = next_step_penalty_observation
        
        
        # CALCULATE STEP BALANCES
        self.fin_balance += self.pv_costs
        self.fin_balance += current_penalty
        self.fin_balance += float(exported_revenue + minimised_revenue)
        
        # CALCULATE TOTAL BALANCES
        self.fin_balance_tot += self.fin_balance                
        
        # SUBSTRACT 1 FOR TIMESTEP
        self.episode_len -= 1
        done = self.episode_len <= 0
        
        # CALCULATE IRR, NPV AND CARBON INTENSITY
        total_cash_flow = self.calculate_irr_and_npv(self.pv_costs, exported_revenue, minimised_revenue, current_penalty)
        irr = npf.irr(total_cash_flow) * 100
        npv = npf.npv(0.04 ,total_cash_flow)
            
        # RETURNS AND CALCULATE REWARD
        if self.episode_len == 0:
            irr_fin = irr
            npv_fin = npv
        
        reward = self.fin_balance / 1000
        
        # FAILURE
         
        survival = self.failure(actions_step)
        self.broke = np.zeros(self.number_of_panels, dtype=np.float32)

        for c, p in enumerate(survival):
            
            if c < self.number_of_panels:

                if p - 1 <= abs(self.episode_len - 24):
                    self.broke[c] = 1

                    self.observation[c] = 0
        
        # DEGRADATION RATE
        # Applying degradation only to panels that are operational (above 0.1 efficiency)
        active_panels = self.observation[:self.number_of_panels] > 0.1
        degradations = np.random.normal(self.deg_mu, self.deg_std, size=self.number_of_panels) / 100
        self.observation[:self.number_of_panels][active_panels] -= degradations[active_panels]
        
        if not done: 
        
            self.next_cost_per_Wp = self.random_cost_per_Wp[abs(self.episode_len - 25)]
            self.next_import_price = self.random_import_price[abs(self.episode_len - 25)]
            self.next_efficency_develop = self.random_efficency_develop[abs(self.episode_len - 25)]
            next_FIT = self.calculate_FiT(self.episode_len, self.next_import_price)
        
            price_observation = (self.next_import_price - 0.00022499) / (0.0020798 - 0.00022499)
            self.observation[self.number_of_panels] = price_observation

            FiT_observation = (next_FIT - 0.00022499 * 0.33) / (0.0020798 - 0.00022499 * 0.33)
            self.observation[self.number_of_panels + 1] = FiT_observation

            eff_observation = (self.next_efficency_develop - 0.999) / (1.156 - 0.999)
            self.observation[self.number_of_panels + 2] = eff_observation

            cost_per_Wp_observation = (self.cost_per_Wp_df_at_zero - self.cost_per_Wp_df.min().min()) / (self.cost_per_Wp_df.max().max() - self.cost_per_Wp_df.min().min())
            self.observation[self.number_of_panels + 3] = cost_per_Wp_observation
        
        
        info = {"step financial balance (eur):": self.fin_balance,
               "total financial balance: (eur)": self.fin_balance_tot,
               "internal rate of return": irr_fin,
               "current_interest": current_penalty,
                "net present value": npv_fin}
         
        
        self.previous_observation = self.observation.copy()
        
        return self.observation, reward, done, False, info

In [13]:
env = TrainEnvironment(AC_OUTPUT_arr, elec_consum_arr, import_price_rate, import_price_train_arr, Eff_train_arr, CAPEX_JA_train_arr)
env_test = TestEnvironment(AC_OUTPUT_arr, elec_consum_arr, import_price_rate, import_price_test_arr, Eff_test_arr, CAPEX_JA_test_arr)

In [14]:
check_env(env)


In [15]:
test3(1, env)

(array([0.        , 0.8728532 , 0.9126229 , 0.8696934 , 0.9406177 ,
       0.90742123, 0.9843079 , 0.9951692 , 0.93203276, 0.89122355,
       0.        , 0.        , 0.06935652, 0.11512984, 0.00636943,
       0.55399454, 0.        , 0.        , 0.        ], dtype=float32), {}) 

STEP: 1
ACT 
 7
OBS 
 [0.99512255 0.99799323 0.90016794 0.98982257 0.9422331  0.9898271
 0.9715873  0.9866422  0.9248598  0.99628246 0.9998283  0.98735833
 0.04258632 0.05998355 0.00636947 0.55399454 0.19065562 0.05054863
 0.22933333]
step financial balance (eur): -102.77738519565537 current_interest 2214.9172


STEP: 2
ACT 
 11
OBS 
 [0.99277085 0.9841646  0.99094707 0.98467374 0.9970173  1.0011727
 0.9796923  0.98735785 0.9875257  1.00404    0.9896332  0.99598545
 0.05745011 0.06878136 0.02931093 0.55399454 0.28712252 0.13984679
 0.74666667]
step financial balance (eur): -2253.329629529107 current_interest 3380.0535


STEP: 3
ACT 
 8
OBS 
 [0.99407727 0.9931505  0.9937387  0.9915658  0.9993947  0.99140733
 1.

In [16]:
test1(1000, env)

Episode:1 Reward:-2.2678364935212074 

total financial balance: (eur) -52614.09412582126 

internal rate of return 0 

net present value 0 

Episode:2 Reward:0.3065969720685957 

total financial balance: (eur) -46529.322686900065 

internal rate of return 0 

net present value 0 

Episode:3 Reward:-0.6831915351295504 

total financial balance: (eur) -32001.340258122935 

internal rate of return 0 

net present value 0 

Episode:4 Reward:-0.624197517041491 

total financial balance: (eur) -37697.550935301755 

internal rate of return 0 

net present value 0 

Episode:5 Reward:-2.38703202340475 

total financial balance: (eur) -43695.57355802166 

internal rate of return 0 

net present value 0 

Episode:6 Reward:-2.4397693076585503 

total financial balance: (eur) -49309.20520249589 

internal rate of return 0 

net present value 0 

Episode:7 Reward:-2.083883005157868 

total financial balance: (eur) -54005.33181469097 

internal rate of return 0 

net present value 0 

Episode:8 Rewar

  value = annual_expense / self.current_budget_constraint


Episode:25 Reward:-2.7387774546522183 

total financial balance: (eur) -52809.576056429476 

internal rate of return 0 

net present value 0 

Episode:26 Reward:-3.194054906384542 

total financial balance: (eur) -65120.90209334994 

internal rate of return 0 

net present value 0 

Episode:27 Reward:-0.7314948436469272 

total financial balance: (eur) -50972.139797717406 

internal rate of return 0 

net present value 0 

Episode:28 Reward:-2.927799461805512 

total financial balance: (eur) -38988.49454046207 

internal rate of return 0 

net present value 0 

Episode:29 Reward:-1.1832872127038492 

total financial balance: (eur) -57752.98687698972 

internal rate of return 0 

net present value 0 

Episode:30 Reward:-2.1128183935644818 

total financial balance: (eur) -71642.64371134402 

internal rate of return 0 

net present value 0 

Episode:31 Reward:-0.9410187812211889 

total financial balance: (eur) -66180.93775541613 

internal rate of return 0 

net present value 0 

Episod

Episode:90 Reward:-2.970845994032722 

total financial balance: (eur) -34524.31827737688 

internal rate of return 0 

net present value 0 

Episode:91 Reward:-2.5873651123379973 

total financial balance: (eur) -62526.549813246966 

internal rate of return 0 

net present value 0 

Episode:92 Reward:-3.005976914731708 

total financial balance: (eur) -55494.666158220185 

internal rate of return 0 

net present value 0 

Episode:93 Reward:-1.230497134310356 

total financial balance: (eur) -43333.733824747615 

internal rate of return 0 

net present value 0 

Episode:94 Reward:-2.654814590775424 

total financial balance: (eur) -53541.749341634626 

internal rate of return 0 

net present value 0 

Episode:95 Reward:-0.563277772017332 

total financial balance: (eur) -27886.111773211273 

internal rate of return 0 

net present value 0 

Episode:96 Reward:-0.015402308475265044 

total financial balance: (eur) -35554.65438048335 

internal rate of return 0 

net present value 0 

Epis

Episode:159 Reward:-1.7187719826850814 

total financial balance: (eur) -52981.41009581659 

internal rate of return 0 

net present value 0 

Episode:160 Reward:-2.345973792729332 

total financial balance: (eur) -49914.507712245395 

internal rate of return 0 

net present value 0 

Episode:161 Reward:-2.8872211430110766 

total financial balance: (eur) -39925.3243842678 

internal rate of return 0 

net present value 0 

Episode:162 Reward:-1.9696082918882092 

total financial balance: (eur) -33656.31659986398 

internal rate of return 0 

net present value 0 

Episode:163 Reward:-3.845182924508904 

total financial balance: (eur) -59368.92240241868 

internal rate of return 0 

net present value 0 

Episode:164 Reward:-0.49567321919121377 

total financial balance: (eur) -35447.638350067136 

internal rate of return 0 

net present value 0 

Episode:165 Reward:-1.9629054823552146 

total financial balance: (eur) -39269.616297563036 

internal rate of return 0 

net present value 0 

Episode:231 Reward:-1.9132193476587933 

total financial balance: (eur) -51593.964572727484 

internal rate of return 0 

net present value 0 

Episode:232 Reward:0.2748308732835358 

total financial balance: (eur) -26552.325192975273 

internal rate of return 0 

net present value 0 

Episode:233 Reward:-2.1700418065186513 

total financial balance: (eur) -40292.51043996196 

internal rate of return 0 

net present value 0 

Episode:234 Reward:-0.4240524556616049 

total financial balance: (eur) -41605.94000515174 

internal rate of return 0 

net present value 0 

Episode:235 Reward:-2.6866907404632903 

total financial balance: (eur) -40938.18398040889 

internal rate of return 0 

net present value 0 

Episode:236 Reward:-0.6508611629151797 

total financial balance: (eur) -55361.96165807137 

internal rate of return 0 

net present value 0 

Episode:237 Reward:-1.5099430548069936 

total financial balance: (eur) -49761.26050516798 

internal rate of return 0 

net present value 0 

Episode:302 Reward:-2.530843308461159 

total financial balance: (eur) -54606.8421295095 

internal rate of return 0 

net present value 0 

Episode:303 Reward:-1.1766849089629596 

total financial balance: (eur) -45340.373345461434 

internal rate of return 0 

net present value 0 

Episode:304 Reward:-2.328658207604313 

total financial balance: (eur) -46596.52570161726 

internal rate of return 0 

net present value 0 

Episode:305 Reward:-1.6669838407575364 

total financial balance: (eur) -44763.57104993845 

internal rate of return 0 

net present value 0 

Episode:306 Reward:-2.0496977206519094 

total financial balance: (eur) -78433.56836367815 

internal rate of return 0 

net present value 0 

Episode:307 Reward:-4.27366670928763 

total financial balance: (eur) -48448.94234963985 

internal rate of return 0 

net present value 0 

Episode:308 Reward:-2.2327434993457045 

total financial balance: (eur) -48955.329740041656 

internal rate of return 0 

net present value 0 

Ep

Episode:370 Reward:-3.2424129784459765 

total financial balance: (eur) -75848.28712141051 

internal rate of return 0 

net present value 0 

Episode:371 Reward:-1.1075356751048049 

total financial balance: (eur) -38607.74348089043 

internal rate of return 0 

net present value 0 

Episode:372 Reward:-3.5618296088265176 

total financial balance: (eur) -62127.60479911581 

internal rate of return 0 

net present value 0 

Episode:373 Reward:-2.6176164617580993 

total financial balance: (eur) -29706.27840173103 

internal rate of return 0 

net present value 0 

Episode:374 Reward:0.047381730726181105 

total financial balance: (eur) -25020.90286104288 

internal rate of return 0 

net present value 0 

Episode:375 Reward:-2.7810415133321706 

total financial balance: (eur) -60557.77755747468 

internal rate of return 0 

net present value 0 

Episode:376 Reward:-1.052629238355762 

total financial balance: (eur) -26128.259019883222 

internal rate of return 0 

net present value 0 

Episode:437 Reward:-1.4394862161036757 

total financial balance: (eur) -46376.69108426686 

internal rate of return 0 

net present value 0 

Episode:438 Reward:-2.95206894518625 

total financial balance: (eur) -42957.14409792868 

internal rate of return 0 

net present value 0 

Episode:439 Reward:-1.2434213675828139 

total financial balance: (eur) -54651.82117312908 

internal rate of return 0 

net present value 0 

Episode:440 Reward:-1.2875352955101065 

total financial balance: (eur) -62656.73057253172 

internal rate of return 0 

net present value 0 

Episode:441 Reward:0.29203286533335837 

total financial balance: (eur) -27839.11061516553 

internal rate of return 0 

net present value 0 

Episode:442 Reward:-1.9403739392790957 

total financial balance: (eur) -51062.5764943357 

internal rate of return 0 

net present value 0 

Episode:443 Reward:-3.1958846044627407 

total financial balance: (eur) -41107.52120577441 

internal rate of return 0 

net present value 0 

Ep

Episode:507 Reward:-2.2114777682597935 

total financial balance: (eur) -51075.42736434659 

internal rate of return 0 

net present value 0 

Episode:508 Reward:-1.9908899786007688 

total financial balance: (eur) -48545.49842495238 

internal rate of return 0 

net present value 0 

Episode:509 Reward:-2.312470907916001 

total financial balance: (eur) -41708.15199884077 

internal rate of return 0 

net present value 0 

Episode:510 Reward:-1.8530771648210806 

total financial balance: (eur) -44897.44413533165 

internal rate of return 0 

net present value 0 

Episode:511 Reward:-2.9327653014239194 

total financial balance: (eur) -76065.32099329002 

internal rate of return 0 

net present value 0 

Episode:512 Reward:-2.5932956942980465 

total financial balance: (eur) -48325.03680162715 

internal rate of return 0 

net present value 0 

Episode:513 Reward:-0.7936605619786662 

total financial balance: (eur) -44849.98820797094 

internal rate of return 0 

net present value 0 



Episode:582 Reward:-2.4774964683924217 

total financial balance: (eur) -61401.86371371554 

internal rate of return 0 

net present value 0 

Episode:583 Reward:-1.5557899832036677 

total financial balance: (eur) -46473.575462917834 

internal rate of return 0 

net present value 0 

Episode:584 Reward:-3.16684546310908 

total financial balance: (eur) -61897.44583644661 

internal rate of return 0 

net present value 0 

Episode:585 Reward:-2.3119564952114957 

total financial balance: (eur) -69323.43110003967 

internal rate of return 0 

net present value 0 

Episode:586 Reward:-2.759135838947588 

total financial balance: (eur) -61801.151282609935 

internal rate of return 0 

net present value 0 

Episode:587 Reward:-0.8719858848568954 

total financial balance: (eur) -30781.257501601893 

internal rate of return 0 

net present value 0 

Episode:588 Reward:-2.002815663079212 

total financial balance: (eur) -30984.055734662987 

internal rate of return 0 

net present value 0 


Episode:655 Reward:-2.312106388222419 

total financial balance: (eur) -38337.59542101552 

internal rate of return 0 

net present value 0 

Episode:656 Reward:-1.6482120948581886 

total financial balance: (eur) -68123.72103898632 

internal rate of return 0 

net present value 0 

Episode:657 Reward:-4.0837279933488935 

total financial balance: (eur) -64447.878713175975 

internal rate of return 0 

net present value 0 

Episode:658 Reward:-1.5681809378607436 

total financial balance: (eur) -40913.36355033109 

internal rate of return 0 

net present value 0 

Episode:659 Reward:-1.039741323966503 

total financial balance: (eur) -59900.52585895384 

internal rate of return 0 

net present value 0 

Episode:660 Reward:0.31262983470562633 

total financial balance: (eur) -35776.63902916078 

internal rate of return 0 

net present value 0 

Episode:661 Reward:-2.023509452411052 

total financial balance: (eur) -57889.4212560387 

internal rate of return 0 

net present value 0 

Ep

Episode:717 Reward:-1.2103416308546615 

total financial balance: (eur) -53301.70267879609 

internal rate of return 0 

net present value 0 

Episode:718 Reward:-1.8722447782526621 

total financial balance: (eur) -61401.793188442076 

internal rate of return 0 

net present value 0 

Episode:719 Reward:-2.681474438980166 

total financial balance: (eur) -43627.70606624545 

internal rate of return 0 

net present value 0 

Episode:720 Reward:-1.7256028095836666 

total financial balance: (eur) -49496.627685842665 

internal rate of return 0 

net present value 0 

Episode:721 Reward:-2.8985345040226025 

total financial balance: (eur) -56957.21097449981 

internal rate of return 0 

net present value 0 

Episode:722 Reward:-3.571630350505849 

total financial balance: (eur) -46012.4921854124 

internal rate of return 0 

net present value 0 

Episode:723 Reward:-4.096719686470006 

total financial balance: (eur) -48017.641715237536 

internal rate of return 0 

net present value 0 



Episode:778 Reward:-0.08495325327584487 

total financial balance: (eur) -58627.22354631194 

internal rate of return 0 

net present value 0 

Episode:779 Reward:-2.362419083244519 

total financial balance: (eur) -45245.066232296886 

internal rate of return 0 

net present value 0 

Episode:780 Reward:-3.947852209977095 

total financial balance: (eur) -58070.544650494034 

internal rate of return 0 

net present value 0 

Episode:781 Reward:-2.217321423089561 

total financial balance: (eur) -42160.098435827276 

internal rate of return 0 

net present value 0 

Episode:782 Reward:-2.1890952062089104 

total financial balance: (eur) -40257.19223869479 

internal rate of return 0 

net present value 0 

Episode:783 Reward:-2.756345962743807 

total financial balance: (eur) -47094.76443150399 

internal rate of return 0 

net present value 0 

Episode:784 Reward:0.29324430055618267 

total financial balance: (eur) -39417.309837764566 

internal rate of return 0 

net present value 0 

Episode:845 Reward:-3.3852647304893737 

total financial balance: (eur) -54771.06387262527 

internal rate of return 0 

net present value 0 

Episode:846 Reward:-0.6846295388127142 

total financial balance: (eur) -54448.79583922288 

internal rate of return 0 

net present value 0 

Episode:847 Reward:-1.4350218092175637 

total financial balance: (eur) -41537.61882331149 

internal rate of return 0 

net present value 0 

Episode:848 Reward:-3.1769813344291062 

total financial balance: (eur) -57100.50889078217 

internal rate of return 0 

net present value 0 

Episode:849 Reward:-3.7194248808245516 

total financial balance: (eur) -58180.44385682404 

internal rate of return 0 

net present value 0 

Episode:850 Reward:-2.500761766423104 

total financial balance: (eur) -56134.30599529768 

internal rate of return 0 

net present value 0 

Episode:851 Reward:-2.6849440848184667 

total financial balance: (eur) -46495.420096473325 

internal rate of return 0 

net present value 0 


Episode:916 Reward:-2.1205378338458063 

total financial balance: (eur) -63403.967139620676 

internal rate of return 0 

net present value 0 

Episode:917 Reward:-1.661272171717002 

total financial balance: (eur) -41379.10117651374 

internal rate of return 0 

net present value 0 

Episode:918 Reward:-1.0189726493730704 

total financial balance: (eur) -31420.623135353475 

internal rate of return 0 

net present value 0 

Episode:919 Reward:-3.895228771759377 

total financial balance: (eur) -58881.808113827065 

internal rate of return 0 

net present value 0 

Episode:920 Reward:-2.2297168014153916 

total financial balance: (eur) -61516.858526437645 

internal rate of return 0 

net present value 0 

Episode:921 Reward:-3.052219836725204 

total financial balance: (eur) -51253.24257900636 

internal rate of return 0 

net present value 0 

Episode:922 Reward:-1.7776371873634316 

total financial balance: (eur) -60503.90625719632 

internal rate of return 0 

net present value 0 

Episode:987 Reward:-0.9718526931593837 

total financial balance: (eur) -37669.66387213419 

internal rate of return 0 

net present value 0 

Episode:988 Reward:-4.153487477579377 

total financial balance: (eur) -64857.57152623804 

internal rate of return 0 

net present value 0 

Episode:989 Reward:-2.7798356160586537 

total financial balance: (eur) -69008.71891319088 

internal rate of return 0 

net present value 0 

Episode:990 Reward:-1.260533106027987 

total financial balance: (eur) -62360.55705081051 

internal rate of return 0 

net present value 0 

Episode:991 Reward:-3.293046218260226 

total financial balance: (eur) -48526.69514593419 

internal rate of return 0 

net present value 0 

Episode:992 Reward:-2.7906748468486477 

total financial balance: (eur) -55719.012808769156 

internal rate of return 0 

net present value 0 

Episode:993 Reward:-0.8838678286865865 

total financial balance: (eur) -42560.16820964227 

internal rate of return 0 

net present value 0 

E

In [17]:
def make_env(rank: int, seed: int = 0) -> Callable:
    def _init() -> gym.Env:
        random.seed(seed + rank)
        np.random.seed(seed + rank) 
        env = TrainEnvironment(AC_OUTPUT_arr, elec_consum_arr, import_price_rate, import_price_train_arr, Eff_train_arr, CAPEX_JA_train_arr)
        env.reset(seed=seed + rank)
        return env

    return _init
# Number of environments to run in parallel
num_cpu = 16
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])   

In [18]:
import math
def logarithmic_schedule(initial_value, final_value=0.00001):
    """
    Returns a function that computes a logarithmically decreasing value from initial_value to final_value.
    """
    def func(progress_remaining):
        # Avoid taking log of zero by setting a lower limit close to zero
        epsilon = 0.0001
        progress = max(epsilon, 1 - progress_remaining)
        # Calculate the decay factor using a logarithmic scale
        return final_value + (initial_value - final_value) * math.log(1/progress)
    return func


learning_rate = logarithmic_schedule(0.0001)

In [19]:
log_path = "./logs/"
eval_callback = EvalCallback(env_test, best_model_save_path = "C:/Users/kubaw/Desktop/DELFT/THESIS/CODE/TEST_MODELS/ja24_low_aa/",
                             log_path = log_path, n_eval_episodes = 750, eval_freq=5000,
                             deterministic=True, render=False)


In [20]:
policy_kwargs = dict(net_arch=dict(pi=[512, 512], vf=[512, 512]))

In [21]:
def linear_schedule(initial_value, final_value=0.00001):
    """
    Returns a function that computes a linearly decreasing value from initial_value to final_value.
    """
    def func(progress_remaining):
        # Calculate the decrease based on the remaining progress
        return final_value + (initial_value - final_value) * progress_remaining
    return func

# Define the learning rate using the linear schedule
learning_rate = linear_schedule(0.0003)

In [23]:
model = PPO("MlpPolicy", env, learning_rate = learning_rate, batch_size = 256, n_epochs = 24, policy_kwargs = policy_kwargs, gamma = 0.99,  verbose=1, tensorboard_log = "C:/Users/kubaw/Desktop/DELFT/THESIS\CODE/TEST_MODELS/LOGS/logs")
TIMESTEPS = 8000000
model.learn(total_timesteps = TIMESTEPS, callback=eval_callback)

Using cpu device
Logging to C:/Users/kubaw/Desktop/DELFT/THESIS\CODE/TEST_MODELS/LOGS/logs\PPO_422
------------------------------
| time/              |       |
|    fps             | 4939  |
|    iterations      | 1     |
|    time_elapsed    | 6     |
|    total_timesteps | 32768 |
------------------------------


  value = annual_expense / self.current_budget_constraint


Eval num_timesteps=47232, episode_reward=28.07 +/- 10.99
Episode length: 25.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 25          |
|    mean_reward          | 28.1        |
| time/                   |             |
|    total_timesteps      | 47232       |
| train/                  |             |
|    approx_kl            | 0.018956501 |
|    clip_fraction        | 0.295       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.2        |
|    explained_variance   | -0.0014     |
|    learning_rate        | 0.000299    |
|    loss                 | 26.5        |
|    n_updates            | 24          |
|    policy_gradient_loss | -0.0288     |
|    value_loss           | 81.2        |
-----------------------------------------
New best mean reward!
------------------------------
| time/              |       |
|    fps             | 1015  |
|    iterations      | 2     |
|    time_

Eval num_timesteps=367232, episode_reward=35.69 +/- 12.91
Episode length: 25.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 25          |
|    mean_reward          | 35.7        |
| time/                   |             |
|    total_timesteps      | 367232      |
| train/                  |             |
|    approx_kl            | 0.016713824 |
|    clip_fraction        | 0.24        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.09       |
|    explained_variance   | 0.439       |
|    learning_rate        | 0.000287    |
|    loss                 | 30.2        |
|    n_updates            | 264         |
|    policy_gradient_loss | -0.0182     |
|    value_loss           | 49.9        |
-----------------------------------------
-------------------------------
| time/              |        |
|    fps             | 645    |
|    iterations      | 12     |
|    time_elapsed    | 609 

-------------------------------
| time/              |        |
|    fps             | 620    |
|    iterations      | 21     |
|    time_elapsed    | 1109   |
|    total_timesteps | 688128 |
-------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 625         |
|    iterations           | 22          |
|    time_elapsed         | 1153        |
|    total_timesteps      | 720896      |
| train/                  |             |
|    approx_kl            | 0.008940687 |
|    clip_fraction        | 0.088       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.43       |
|    explained_variance   | 0.586       |
|    learning_rate        | 0.000275    |
|    loss                 | 25.8        |
|    n_updates            | 504         |
|    policy_gradient_loss | -0.00405    |
|    value_loss           | 44.5        |
-----------------------------------------
--------------------

--------------------------------
| time/              |         |
|    fps             | 604     |
|    iterations      | 31      |
|    time_elapsed    | 1681    |
|    total_timesteps | 1015808 |
--------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 606         |
|    iterations           | 32          |
|    time_elapsed         | 1727        |
|    total_timesteps      | 1048576     |
| train/                  |             |
|    approx_kl            | 0.008144934 |
|    clip_fraction        | 0.0855      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.19       |
|    explained_variance   | 0.628       |
|    learning_rate        | 0.000263    |
|    loss                 | 22.5        |
|    n_updates            | 744         |
|    policy_gradient_loss | -0.00374    |
|    value_loss           | 42.8        |
-----------------------------------------
-------------

--------------------------------
| time/              |         |
|    fps             | 601     |
|    iterations      | 41      |
|    time_elapsed    | 2234    |
|    total_timesteps | 1343488 |
--------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 603         |
|    iterations           | 42          |
|    time_elapsed         | 2279        |
|    total_timesteps      | 1376256     |
| train/                  |             |
|    approx_kl            | 0.007922545 |
|    clip_fraction        | 0.0763      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.906      |
|    explained_variance   | 0.681       |
|    learning_rate        | 0.000251    |
|    loss                 | 20          |
|    n_updates            | 984         |
|    policy_gradient_loss | -0.00405    |
|    value_loss           | 36.9        |
-----------------------------------------
Eval num_time


KeyboardInterrupt



In [21]:
#model.save(r"C:\Users\kubaw\Desktop\DELFT\THESIS\CODE\TEST_MODELS\FINAL24_ja_low")

In [10]:
model = PPO.load(r"C:\Users\kubaw\Desktop\DELFT\THESIS\CODE\TEST_MODELS\FINAL24_ja_high.zip")

In [11]:
evaluate1(1, env_test, model)

Act: 9 
 Obs: [0.8506809  0.8584513  0.9979332  0.9444468  0.94620717 0.8702701
 0.89418256 0.9858364  0.99208087 0.         0.         0.9894404
 0.9902745  0.9319078  0.9669751  0.930064   0.9864388  0.9897222
 0.8567538  0.99934334 0.9690382  0.9883636  0.98566294 0.96554
 0.07780816 0.08083121 0.04069865 0.6843568  0.22262634 0.06385198
 0.4304    ] 
 Balance 934.6484733204852
Act: 1 
 Obs: [0.83448714 0.84543145 0.9839316  0.92941517 0.9384754  0.8553898
 0.88254833 0.9720743  0.97176784 0.99386346 0.         0.98887855
 0.9821276  0.918809   0.9589939  0.9213087  0.971079   0.98773867
 0.8422795  0.9796777  0.9611708  0.97831863 0.9790401  0.9443
 0.0921257  0.08930569 0.04105865 0.6808497  0.09113253 0.0676831
 0.9064    ] 
 Balance 1145.615082726474
Act: 3 
 Obs: [0.98640937 0.8348458  0.97125524 0.91823715 0.92682475 0.8490568
 0.8809269  0.96460253 0.964308   0.98867255 0.9894532  0.97657263
 0.974911   0.9099361  0.9591434  0.9067026  0.9565509  0.9706006
 0.9933011  0.96924

In [12]:
def evaluate2(episodes, environment, model):
    
    mean_irr = 0
    mean_fin_balance = 0
    irr = 0
    fin_balance = 0
    count = 0
    npv = 0
    list_npv = []

    for ep in range(episodes):

        obs, _ = environment.reset()  # Unpack the tuple and ignore the info part
        done = False

        while not done:
            action, _ = model.predict(obs)  # Now obs is just the observation array
            obs, reward, done, truncated, info = environment.step(action)
            # Extracting the 2nd and 3rd key-value pairs
            keys = list(info.keys())
            values = list(info.values())

            # Getting the 2nd key-value pair
            second_value = values[1]

            # Getting the 3rd key-value pair
    
            third_value = values[2]
            fourth_value = values[4]
        
        fin_balance += second_value
        npv += fourth_value
        count += 1
        
        list_npv.append(fourth_value)
            
    mean_fin_balance = fin_balance/count
    mean_npv = npv/count

    #print(mean_npv)

    environment.close()
    
    return(list_npv)

In [18]:
values_eval = evaluate2(5000, env_test, model)

  value = annual_expense / self.current_budget_constraint
