In [1]:
import numpy as np
import gymnasium as gym
import random
import math

### Create Environment

In [None]:
class SmartCharging(gym.env):

    def __init__(self, render_mode=None):

        # action - kw: zero(0) - 0, low(1) - 7, medium(2) - 14, high(3) - 22
        self.actions_to_kw = [0, 7, 14, 22]
        self.action_space = spaces.Discrete(4)
        
        # average EV capacity 72 kWh: https://ev-database.org/cheatsheet/useable-battery-capacity-electric-car
        self.battery_capacity = 72
        # time steps: 0 - 2 p.m., 1 - 2:15 p.m., ... , 7 - 3:45 p.m., 8 - 4 p.m.
        self.time_step = 0
        self.observation_space = np.array([spaces.Box(0, self.capacity, shape=(1,)), spaces.Box(0, 8, shape=(1,))])
        
        # 15 minute charging slots
        self.time_slot = 0.25
        # random start battery status in kWh to simulate energy left from previous shift
        self.battery_status = random.randint(0,self.battery_capacity)

    def _get_obs(self):
        return {"battery status": self.battery_status, "time step": self.time_step}

    def step(self, action):

        terminated = False
        
        if self.time_step = 8:
            
            energy_demand = np.random.normal(loc=30, scale=5)
            penalty_factor = 10
            
            if energy_demand > self.battery_status:
                
                self.battery_status = 0
                # to really penalize running out of energy, make penalty as high as maximum charging costs. Still open what time coefficient is
                sum_of_time_coeffs = 2
                reward = (-1) * sum_of_time_coeffs * math.exp(self.actions_to_kw[3]) * penalty_factor
            else:
                
                self.battery_status -= energy_demand
                reward = 0

            terminated = True
        else:
            
            charged_kWh = self.time_slot * self.actions_to_kw[action]
            time_coeff = self.time_slot # open question: What does the coefficient represent?
            
            if self.battery_status + charged_kWh > self.battery_capacity:
                
                charged_kWh = self.battery_capacity - self.battery_status
                time_coeff = charged_kWh/self.actions_to_kw[action]
                
                
            self.battery_status += charged_kWh
            self.time_step += 1
            
            reward = (-1) * time_coeff * math.exp(self.actions_to_kw[action])
        
        return self._get_obs_(), reward, terminated

    def reset(self, seed=None, options=None):
        
        self.battery_status = random.randint(0,self.battery_capacity)
        self.time_step = 0
        
        return self._get_obs_()
        