In [6]:
import sys
exe = sys.executable
if exe.startswith('\\\\?\\'):
    exe = executable[4:]
print(exe)
!"{executable}" -m pip install gymnasium


C:\Users\luthi\AppData\Roaming\jupyterlab-desktop\jlab_server\python.exe


In [None]:
import sys 
print(sys.executable)


In [3]:
import gymnasium as gym
import numpy as np

class AdvancedMicrogridEnv(gym.Env):
    """
    a microgrid environment inspired by research on drl for microgrid energy management.
    the environment simulates a community battery, renewable generation, load demand, 
    and grid pricing.
    """
    
    def __init__(self):
        super(AdvancedMicrogridEnv, self).__init__()

        # 0: do nothing
        # 1: charge battery
        # 2: discharge battery
        # 3: buy from grid
        # 4: sell to grid
        
        self.action_space = gym.spaces.Discrete(5)

        # define observation space:
        # [battery soc, renewable generation, load demand, electricity price, time-of-day]
        
        self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(5,), dtype=np.float32)

        # parameters:
        # simulate one day with hourly steps
        
        self.max_steps = 24
        self.current_step = 0

        # battery parameters:
        # arbitrary units
        
        self.battery_capacity = 100.0
        # initial soc (50%)
        self.battery_soc = 0.5
        # units per step if charging
        self.charge_rate = 10.0
        # units per step if discharging
        self.discharge_rate = 10.0

        # grid price parameters:
        # base electricity price per unit
        self.base_price = 0.2
        # variation amplitude for peak hours
        self.price_amplitude = 0.1

        # cost parameters:
        self.grid_buy_cost_multiplier = 1.0
        # assume selling gives less revenue than buying cost
        self.grid_sell_revenue_multiplier = 0.8

        # other state variables:
        # will be set on reset
        self.renewable_generation = None
        self.load_demand = None
        # normalized (0 to 1)
        self.time_of_day = None

        self.reset()

    def _simulate_renewable_generation(self):
        # for simplicity, assume generation peaks at midday
        # normalize time_of_day so that generation is highest when time_of_day ~ 0.5
        peak_factor = np.exp(-((self.time_of_day - 0.5) ** 2) / 0.02)
        # add some randomness
        generation = np.clip(peak_factor + np.random.uniform(-0.1, 0.1), 0, 1)
        return generation
    
    def _simulate_load_demand(self):
        # demand may be higher during morning and evening
        # a simple model using two peaks
        if self.time_of_day < 0.3:
            base_demand = 0.7
        elif self.time_of_day < 0.6:
            base_demand = 0.4
        else:
            base_demand = 0.8
        demand = np.clip(base_demand + np.random.uniform(-0.2, 0.2), 0, 1)
        return demand

    def _simulate_price(self):
        # electricity price follows a daily cycle: lower at night, higher during the day
        # time_of_day is normalized between 0 and 1
        price = self.base_price + self.price_amplitude * np.sin(np.pi * self.time_of_day)
        # normalize price into 0-1 scale (for the state observation) based on an expected range
        # here, assume max price is base_price + price_amplitude
        normalized_price = (price - self.base_price) / self.price_amplitude
        return np.clip(normalized_price, 0, 1)

    def step(self, action):
        # update time: assume each step is one hour
        self.time_of_day = (self.current_step % 24) / 24.0

        # update renewable generation, load demand, and electricity price
        self.renewable_generation = self._simulate_renewable_generation()
        self.load_demand = self._simulate_load_demand()
        price = self._simulate_price()

        # initialize cost/reward for this step
        cost = 0.0

        # process the chosen action
        if action == 0:
            # do nothing
            pass
        elif action == 1:
            # charge battery: use renewable energy if available, otherwise buy from grid
            charge_amount = self.charge_rate
            # check if renewable generation can cover it
            if self.renewable_generation * self.battery_capacity >= charge_amount:
                # use renewable energy to charge (assume zero cost)
                pass
            else:
                # buy missing energy from grid
                missing = charge_amount - self.renewable_generation * self.battery_capacity
                cost += missing * self.grid_buy_cost_multiplier * price
            self.battery_soc = min(self.battery_soc + charge_amount / self.battery_capacity, 1.0)
        elif action == 2:
            # discharge battery: supply load or sell excess
            discharge_amount = self.discharge_rate
            self.battery_soc = max(self.battery_soc - discharge_amount / self.battery_capacity, 0.0)
            # for simplicity, assume discharged energy is used to meet load (reducing cost)
            cost -= discharge_amount * 0.05
        elif action == 3:
            # buy energy from grid to meet load demand if renewable and battery are insufficient
            # calculate energy shortfall
            energy_from_renewable = self.renewable_generation * self.battery_capacity * 0.5
            energy_from_battery = self.battery_soc * self.battery_capacity * 0.5
            shortfall = max(self.load_demand * self.battery_capacity - (energy_from_renewable + energy_from_battery), 0)
            cost += shortfall * self.grid_buy_cost_multiplier * price
        elif action == 4:
            # sell excess energy to the grid
            # calculate excess from renewable generation
            excess = max(self.renewable_generation * self.battery_capacity - self.load_demand * self.battery_capacity, 0)
            cost -= excess * self.grid_sell_revenue_multiplier * price

        # compute reward: here we define reward as negative cost (i.e., lower cost is better)
        reward = -cost

        # assemble the state vector
        state = np.array([
            self.battery_soc,
            self.renewable_generation,
            self.load_demand,
            price,
            self.time_of_day
        ], dtype=np.float32)

        self.current_step += 1
        done = self.current_step >= self.max_steps

        info = {}
        return state, reward, done, info

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        # reset battery to 50%
        self.battery_soc = 0.5
        self.time_of_day = 0.0
        self.renewable_generation = self._simulate_renewable_generation()
        self.load_demand = self._simulate_load_demand()
        price = self._simulate_price()
        state = np.array([
            self.battery_soc,
            self.renewable_generation,
            self.load_demand,
            price,
            self.time_of_day
        ], dtype=np.float32)
        return state, {}

    def render(self, mode='human'):
        print(f"step: {self.current_step} | battery soc: {self.battery_soc:.2f} | renewable: {self.renewable_generation:.2f} | "
              f"demand: {self.load_demand:.2f} | price: {self._simulate_price():.2f} | time: {self.time_of_day:.2f}")

if __name__ == "__main__":
    env = AdvancedMicrogridEnv()
    state, _ = env.reset()
    for _ in range(24):
        action = env.action_space.sample()
        state, reward, done, _ = env.step(action)
        env.render()
        if done:
            break

step: 1 | battery soc: 0.50 | renewable: 0.00 | demand: 0.66 | price: 0.00 | time: 0.00
step: 2 | battery soc: 0.40 | renewable: 0.00 | demand: 0.73 | price: 0.13 | time: 0.04
step: 3 | battery soc: 0.40 | renewable: 0.09 | demand: 0.56 | price: 0.26 | time: 0.08
step: 4 | battery soc: 0.40 | renewable: 0.04 | demand: 0.58 | price: 0.38 | time: 0.12
step: 5 | battery soc: 0.50 | renewable: 0.09 | demand: 0.56 | price: 0.50 | time: 0.17
step: 6 | battery soc: 0.50 | renewable: 0.11 | demand: 0.54 | price: 0.61 | time: 0.21
step: 7 | battery soc: 0.40 | renewable: 0.01 | demand: 0.68 | price: 0.71 | time: 0.25
step: 8 | battery soc: 0.40 | renewable: 0.13 | demand: 0.71 | price: 0.79 | time: 0.29
step: 9 | battery soc: 0.40 | renewable: 0.20 | demand: 0.33 | price: 0.87 | time: 0.33
step: 10 | battery soc: 0.30 | renewable: 0.50 | demand: 0.55 | price: 0.92 | time: 0.38
step: 11 | battery soc: 0.20 | renewable: 0.66 | demand: 0.34 | price: 0.97 | time: 0.42
step: 12 | battery soc: 0.20 |