In [1]:
# import libaries
import geopandas as gpd
import pandas as pd
import json
import numpy as np
from sqlalchemy import create_engine
import time
from datetime import date, timedelta
from rl_v2g import CarsharingEnv
import math
import stable_baselines3
from stable_baselines3 import PPO
from gym.utils.env_checker import check_env as checker_gym
from stable_baselines3.common.env_checker import check_env as checker_baselines3
import torch
import torch.nn as nn
from stable_baselines3.common.logger import configure
import random
# load the database credentials from the JSON file
with open('config/credentials.json') as f:
    credentials = json.load(f)

# create connection string
connection_string = f"postgresql://{credentials['username']}:{credentials['password']}@{credentials['host']}:{credentials['port']}/{credentials['database_name']}"

# create the engine with the connection string
engine = create_engine(connection_string)


# Application of Car-sharing Simulation Environment 

Choose the timespan for the simulation. The simulation will be executed chronologically, starting from the first day (2019-1-1) and continuing for subsequent days (2019-1-2, 2019-1-3, etc.). If a start date other than 2019-1-1 is selected, the "Start simulation" cell below may need to be modified.

In [2]:
# set learn period
start_date = date(2019, 1, 8)
end_date = date(2020, 7, 26)
#end_date = date(2019, 1, 10)
start_week = 1

# set simulation period
start_date_simulation = date(2019, 1, 1)
end_date_simulation = date(2019, 1, 8)
start_week_simulation = 0

# calculate number of days to learn
nr_iterations = (end_date - start_date).days

# calculate number of days to simulate
nr_iterations_simulation = (end_date_simulation - start_date_simulation).days

# Load data for simulation

### Car-sharing stations

In [3]:
# get station geodata, create spatial index
sql = " SELECT * FROM msc_2023_dominik.distinct_stations"
stations = gpd.read_postgis(sql, engine, geom_col='geom',crs = "EPSG:2056")
stations.sindex
stations.head()

Unnamed: 0,station_no,geom
0,2901,POINT (2555501.836 1145060.068)
1,2905,POINT (2752963.411 1260089.916)
2,2910,POINT (2501877.645 1126218.900)
3,2913,POINT (2682234.096 1243208.370)
4,2918,POINT (2736874.744 1253090.505)


### Vehicle information

In [4]:
# get vehicle data
sql = "SELECT * FROM msc_2023_dominik.vehicle_information ORDER BY vehicle_no limit 10"
vehicles = pd.read_sql(sql, engine)
vehicles.head()

Unnamed: 0,index,vehicle_category,vehicle_no,model_name,brand_name,charge_power,battery_capacity,range
0,2962,Minivan,106516,eVito 129KB Tourer Pro 3200,Mercedes-Benz,11.0,100.0,378.0
1,2963,Minivan,106517,eVito 129KB Tourer Pro 3200,Mercedes-Benz,11.0,100.0,378.0
2,2964,Minivan,106518,eVito 129KB Tourer Pro 3200,Mercedes-Benz,11.0,100.0,378.0
3,2965,Minivan,106519,eVito 129KB Tourer Pro 3200,Mercedes-Benz,11.0,100.0,378.0
4,2966,Combi,106526,Enyaq iV80,Skoda,11.0,82.0,420.0


### Reservations

In [5]:
# get daily reservations, save in dict for fast data access
delta = timedelta(days=1)
reservations_dict = {}
start_date_reservations = start_date
while start_date_reservations <= end_date:
    sql = """SELECT reservation_no, start_station_no, vehicle_no, reservationfrom_time_discrete, drive_firststart_time_discrete, 
            drive_lastend_time_discrete, reservation_duration, revenue_distance, required_soc, revenue_duration, drive_km, 
            (floor(EXTRACT(epoch FROM (date_trunc('hour', TO_TIMESTAMP(drive_lastend, 'YYYY-MM-DD HH24:MI:SS.MS')) + 
                                floor(EXTRACT(minute FROM TO_TIMESTAMP(drive_lastend, 'YYYY-MM-DD HH24:MI:SS.MS')) / 15) * interval '15 minutes' 
                                - date_trunc('hour', TO_TIMESTAMP(drive_firststart, 'YYYY-MM-DD HH24:MI:SS.MS')) - 
                                floor(EXTRACT(minute FROM TO_TIMESTAMP(drive_firststart, 'YYYY-MM-DD HH24:MI:SS.MS')) / 15) * interval '15 minutes'
                               )) / 900) * 900 + 900) / 900 AS drive_duration
            FROM msc_2023_dominik.reservations_long_time 
            WHERE  DATE(reservationfrom_discrete_date) = '{}' or  DATE(drive_firststart_discrete_date) = '{}' 
            ORDER BY reservationfrom_discrete""".format(start_date_reservations, start_date_reservations)
    reservations = pd.read_sql(sql, engine)
    reservations_dict[start_date_reservations.strftime('%Y-%m-%d')] = reservations
    start_date_reservations += delta
reservations_dict[(start_date).strftime('%Y-%m-%d')].head()

Unnamed: 0,reservation_no,start_station_no,vehicle_no,reservationfrom_time_discrete,drive_firststart_time_discrete,drive_lastend_time_discrete,reservation_duration,revenue_distance,required_soc,revenue_duration,drive_km,drive_duration
0,24281745,4714,114572,72.0,715.0,715.0,1804.0,0.0,0.0,0.0,1.0,1.0
1,24281754,4714,114582,72.0,716.0,716.0,1804.0,0.0,0.0,0.0,1.0,1.0
2,22919882,4173,116242,576.0,734.0,925.0,672.0,0.0,9.5,0.0,19.0,191.0
3,24340023,2902,116881,604.0,701.0,982.0,380.0,178.4,53.095238,301.5,223.0,283.0
4,24154068,3057,113667,604.0,706.0,924.0,424.0,68.5,51.5,265.0,103.0,219.0


In [6]:
reservations_dict.keys()

dict_keys(['2019-01-08', '2019-01-09', '2019-01-10', '2019-01-11', '2019-01-12', '2019-01-13', '2019-01-14', '2019-01-15', '2019-01-16', '2019-01-17', '2019-01-18', '2019-01-19', '2019-01-20', '2019-01-21', '2019-01-22', '2019-01-23', '2019-01-24', '2019-01-25', '2019-01-26', '2019-01-27', '2019-01-28', '2019-01-29', '2019-01-30', '2019-01-31', '2019-02-01', '2019-02-02', '2019-02-03', '2019-02-04', '2019-02-05', '2019-02-06', '2019-02-07', '2019-02-08', '2019-02-09', '2019-02-10', '2019-02-11', '2019-02-12', '2019-02-13', '2019-02-14', '2019-02-15', '2019-02-16', '2019-02-17', '2019-02-18', '2019-02-19', '2019-02-20', '2019-02-21', '2019-02-22', '2019-02-23', '2019-02-24', '2019-02-25', '2019-02-26', '2019-02-27', '2019-02-28', '2019-03-01', '2019-03-02', '2019-03-03', '2019-03-04', '2019-03-05', '2019-03-06', '2019-03-07', '2019-03-08', '2019-03-09', '2019-03-10', '2019-03-11', '2019-03-12', '2019-03-13', '2019-03-14', '2019-03-15', '2019-03-16', '2019-03-17', '2019-03-18', '2019-03-

In [7]:
# get daily reservations, save in dict for fast data access
delta = timedelta(days=1)
reservations_dict_simulation = {}
start_date_reservations = start_date_simulation
while start_date_reservations <= end_date_simulation:
    sql = """SELECT reservation_no, start_station_no, vehicle_no, reservationfrom_time_discrete, drive_firststart_time_discrete, 
            drive_lastend_time_discrete, reservation_duration, revenue_distance, required_soc, revenue_duration, drive_km, 
            (floor(EXTRACT(epoch FROM (date_trunc('hour', TO_TIMESTAMP(drive_lastend, 'YYYY-MM-DD HH24:MI:SS.MS')) + 
                                floor(EXTRACT(minute FROM TO_TIMESTAMP(drive_lastend, 'YYYY-MM-DD HH24:MI:SS.MS')) / 15) * interval '15 minutes' 
                                - date_trunc('hour', TO_TIMESTAMP(drive_firststart, 'YYYY-MM-DD HH24:MI:SS.MS')) - 
                                floor(EXTRACT(minute FROM TO_TIMESTAMP(drive_firststart, 'YYYY-MM-DD HH24:MI:SS.MS')) / 15) * interval '15 minutes'
                               )) / 900) * 900 + 900) / 900 AS drive_duration
            FROM msc_2023_dominik.reservations_long_time 
            WHERE  DATE(reservationfrom_discrete_date) = '{}' or  DATE(drive_firststart_discrete_date) = '{}' 
            ORDER BY reservationfrom_discrete""".format(start_date_reservations, start_date_reservations)
    reservations = pd.read_sql(sql, engine)
    reservations_dict_simulation[start_date_reservations.strftime('%Y-%m-%d')] = reservations
    start_date_reservations += delta
reservations_dict_simulation[(start_date_simulation).strftime('%Y-%m-%d')].head()

Unnamed: 0,reservation_no,start_station_no,vehicle_no,reservationfrom_time_discrete,drive_firststart_time_discrete,drive_lastend_time_discrete,reservation_duration,revenue_distance,required_soc,revenue_duration,drive_km,drive_duration
0,24134345,2938,114034,0.0,1.0,19.0,22.0,44.85,34.5,13.75,69.0,20.0
1,24519097,4407,113833,0.0,2.0,9.0,10.0,13.0,10.0,6.25,20.0,7.0
2,24519221,3165,116525,0.0,0.0,96.0,96.0,0.0,0.0,0.0,0.0,97.0
3,24519174,1557,115969,0.0,0.0,49.0,50.0,45.75,30.5,43.75,61.0,50.0
4,24514447,2702,114871,0.0,4.0,5.0,6.0,2.66,1.538462,7.5,4.0,2.0


In [8]:
reservations_dict_simulation.keys()

dict_keys(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04', '2019-01-05', '2019-01-06', '2019-01-07', '2019-01-08'])

### Electicity prices for charging

In [9]:
# get charging costs data
prices = ""
for i in range(0, 480, 5):
    price = i / 20
    prices += '"Price_chf_kwh_{}", '.format(price)

sql = """SELECT {} "Delivery day" FROM msc_2023_dominik.charging_costs WHERE "Delivery day" >=  '{}' and "Delivery day" <=  '{}' ORDER BY "Delivery day" """.format(prices, start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))

charging_costs = pd.read_sql(sql, engine)
charging_costs.head()

Unnamed: 0,Price_chf_kwh_0.0,Price_chf_kwh_0.25,Price_chf_kwh_0.5,Price_chf_kwh_0.75,Price_chf_kwh_1.0,Price_chf_kwh_1.25,Price_chf_kwh_1.5,Price_chf_kwh_1.75,Price_chf_kwh_2.0,Price_chf_kwh_2.25,...,Price_chf_kwh_21.75,Price_chf_kwh_22.0,Price_chf_kwh_22.25,Price_chf_kwh_22.5,Price_chf_kwh_22.75,Price_chf_kwh_23.0,Price_chf_kwh_23.25,Price_chf_kwh_23.5,Price_chf_kwh_23.75,Delivery day
0,0.055468,0.055468,0.055468,0.055468,0.050245,0.050245,0.050245,0.050245,0.049484,0.049484,...,0.065208,0.065165,0.065165,0.065165,0.065165,0.061071,0.061071,0.061071,0.061071,2019-01-08
1,0.061907,0.061907,0.061907,0.061907,0.057564,0.057564,0.057564,0.057564,0.057086,0.057086,...,0.067586,0.067641,0.067641,0.067641,0.067641,0.064112,0.064112,0.064112,0.064112,2019-01-09
2,0.062939,0.062939,0.062939,0.062939,0.058443,0.058443,0.058443,0.058443,0.05815,0.05815,...,0.072842,0.070312,0.070312,0.070312,0.070312,0.061842,0.061842,0.061842,0.061842,2019-01-10
3,0.068889,0.068889,0.068889,0.068889,0.061907,0.061907,0.061907,0.061907,0.060854,0.060854,...,0.072212,0.071756,0.071756,0.071756,0.071756,0.069389,0.069389,0.069389,0.069389,2019-01-11
4,0.067521,0.067521,0.067521,0.067521,0.061951,0.061951,0.061951,0.061951,0.059366,0.059366,...,0.058595,0.061353,0.061353,0.061353,0.061353,0.058606,0.058606,0.058606,0.058606,2019-01-12


In [10]:
# save in dict for fast data access
delta = timedelta(days=1)
charging_costs_dict = {}
start_date_electricity = start_date
while start_date_electricity <= end_date:
    electricity_price_day = charging_costs[charging_costs["Delivery day"].dt.date == start_date_electricity].drop(["Delivery day"],axis = 1).iloc[0].values
    charging_costs_dict[start_date_electricity.strftime('%Y-%m-%d')] = electricity_price_day
    start_date_electricity += delta

In [11]:
charging_costs_dict.keys()

dict_keys(['2019-01-08', '2019-01-09', '2019-01-10', '2019-01-11', '2019-01-12', '2019-01-13', '2019-01-14', '2019-01-15', '2019-01-16', '2019-01-17', '2019-01-18', '2019-01-19', '2019-01-20', '2019-01-21', '2019-01-22', '2019-01-23', '2019-01-24', '2019-01-25', '2019-01-26', '2019-01-27', '2019-01-28', '2019-01-29', '2019-01-30', '2019-01-31', '2019-02-01', '2019-02-02', '2019-02-03', '2019-02-04', '2019-02-05', '2019-02-06', '2019-02-07', '2019-02-08', '2019-02-09', '2019-02-10', '2019-02-11', '2019-02-12', '2019-02-13', '2019-02-14', '2019-02-15', '2019-02-16', '2019-02-17', '2019-02-18', '2019-02-19', '2019-02-20', '2019-02-21', '2019-02-22', '2019-02-23', '2019-02-24', '2019-02-25', '2019-02-26', '2019-02-27', '2019-02-28', '2019-03-01', '2019-03-02', '2019-03-03', '2019-03-04', '2019-03-05', '2019-03-06', '2019-03-07', '2019-03-08', '2019-03-09', '2019-03-10', '2019-03-11', '2019-03-12', '2019-03-13', '2019-03-14', '2019-03-15', '2019-03-16', '2019-03-17', '2019-03-18', '2019-03-

In [12]:
# get charging costs data
prices = ""
for i in range(0, 480, 5):
    price = i / 20
    prices += '"Price_chf_kwh_{}", '.format(price)

sql = """SELECT {} "Delivery day" FROM msc_2023_dominik.charging_costs WHERE "Delivery day" >=  '{}' and "Delivery day" <=  '{}' ORDER BY "Delivery day" """.format(prices, start_date_simulation.strftime('%Y-%m-%d'), end_date_simulation.strftime('%Y-%m-%d'))

charging_costs = pd.read_sql(sql, engine)
charging_costs.head()

Unnamed: 0,Price_chf_kwh_0.0,Price_chf_kwh_0.25,Price_chf_kwh_0.5,Price_chf_kwh_0.75,Price_chf_kwh_1.0,Price_chf_kwh_1.25,Price_chf_kwh_1.5,Price_chf_kwh_1.75,Price_chf_kwh_2.0,Price_chf_kwh_2.25,...,Price_chf_kwh_21.75,Price_chf_kwh_22.0,Price_chf_kwh_22.25,Price_chf_kwh_22.5,Price_chf_kwh_22.75,Price_chf_kwh_23.0,Price_chf_kwh_23.25,Price_chf_kwh_23.5,Price_chf_kwh_23.75,Delivery day
0,0.054577,0.054577,0.054577,0.054577,0.052927,0.052927,0.052927,0.052927,0.051298,0.051298,...,0.055055,0.059855,0.059855,0.059855,0.059855,0.059844,0.059844,0.059844,0.059844,2019-01-01
1,0.054262,0.054262,0.054262,0.054262,0.05284,0.05284,0.05284,0.05284,0.04513,0.04513,...,0.066175,0.06523,0.06523,0.06523,0.06523,0.064036,0.064036,0.064036,0.064036,2019-01-02
2,0.057585,0.057585,0.057585,0.057585,0.054838,0.054838,0.054838,0.054838,0.052471,0.052471,...,0.068433,0.066262,0.066262,0.066262,0.066262,0.062276,0.062276,0.062276,0.062276,2019-01-03
3,0.057107,0.057107,0.057107,0.057107,0.05613,0.05613,0.05613,0.05613,0.054241,0.054241,...,0.070214,0.070301,0.070301,0.070301,0.070301,0.065914,0.065914,0.065914,0.065914,2019-01-04
4,0.068477,0.068477,0.068477,0.068477,0.063503,0.063503,0.063503,0.063503,0.059431,0.059431,...,0.065045,0.066381,0.066381,0.066381,0.066381,0.06637,0.06637,0.06637,0.06637,2019-01-05


In [13]:
# save in dict for fast data access
delta = timedelta(days=1)
charging_costs_dict_simulation = {}
start_date_electricity = start_date_simulation
while start_date_electricity <= end_date_simulation:
    electricity_price_day = charging_costs[charging_costs["Delivery day"].dt.date == start_date_electricity].drop(["Delivery day"],axis = 1).iloc[0].values
    charging_costs_dict_simulation[start_date_electricity.strftime('%Y-%m-%d')] = electricity_price_day
    start_date_electricity += delta

In [14]:
charging_costs_dict_simulation.keys()

dict_keys(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04', '2019-01-05', '2019-01-06', '2019-01-07', '2019-01-08'])

### Secondary energy prices (for V2G)

In [15]:
# get v2g price data
sql = """SELECT "Timestamp", "Secondary_positive_v2g_prices_chf_kwh", "Secondary_negative_v2g_prices_chf_kwh" FROM msc_2023_dominik.v2g_prices WHERE "Timestamp" >=  '{}' and "Timestamp" <=  '{}' ORDER BY "Timestamp" """.format(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))
v2g_prices = pd.read_sql(sql, engine)
v2g_prices.head()

Unnamed: 0,Timestamp,Secondary_positive_v2g_prices_chf_kwh,Secondary_negative_v2g_prices_chf_kwh
0,2019-01-08 00:00:00,0.052801,0.035201
1,2019-01-08 00:15:00,0.052687,0.035132
2,2019-01-08 00:30:00,0.052687,0.035132
3,2019-01-08 00:45:00,0.052687,0.035132
4,2019-01-08 01:00:00,0.052687,0.035132


In [16]:
# save in dict for fast data access
delta = timedelta(days=1)
v2g_price_dict = {}
start_date_v2g = start_date
while start_date_v2g <= end_date:
    v2g_price_day_positive = v2g_prices[v2g_prices['Timestamp'].dt.date == pd.Timestamp(start_date_v2g).date()].drop(["Timestamp"],axis = 1)["Secondary_positive_v2g_prices_chf_kwh"].values
    v2g_price_day_negative = v2g_prices[v2g_prices['Timestamp'].dt.date == pd.Timestamp(start_date_v2g).date()].drop(["Timestamp"],axis = 1)["Secondary_negative_v2g_prices_chf_kwh"].values
    v2g_price_dict[start_date_v2g.strftime('%Y-%m-%d')] = [v2g_price_day_positive, v2g_price_day_negative]
    start_date_v2g += delta

In [17]:
v2g_price_dict.keys()

dict_keys(['2019-01-08', '2019-01-09', '2019-01-10', '2019-01-11', '2019-01-12', '2019-01-13', '2019-01-14', '2019-01-15', '2019-01-16', '2019-01-17', '2019-01-18', '2019-01-19', '2019-01-20', '2019-01-21', '2019-01-22', '2019-01-23', '2019-01-24', '2019-01-25', '2019-01-26', '2019-01-27', '2019-01-28', '2019-01-29', '2019-01-30', '2019-01-31', '2019-02-01', '2019-02-02', '2019-02-03', '2019-02-04', '2019-02-05', '2019-02-06', '2019-02-07', '2019-02-08', '2019-02-09', '2019-02-10', '2019-02-11', '2019-02-12', '2019-02-13', '2019-02-14', '2019-02-15', '2019-02-16', '2019-02-17', '2019-02-18', '2019-02-19', '2019-02-20', '2019-02-21', '2019-02-22', '2019-02-23', '2019-02-24', '2019-02-25', '2019-02-26', '2019-02-27', '2019-02-28', '2019-03-01', '2019-03-02', '2019-03-03', '2019-03-04', '2019-03-05', '2019-03-06', '2019-03-07', '2019-03-08', '2019-03-09', '2019-03-10', '2019-03-11', '2019-03-12', '2019-03-13', '2019-03-14', '2019-03-15', '2019-03-16', '2019-03-17', '2019-03-18', '2019-03-

In [18]:
# get v2g price data
sql = """SELECT "Timestamp", "Secondary_positive_v2g_prices_chf_kwh", "Secondary_negative_v2g_prices_chf_kwh" FROM msc_2023_dominik.v2g_prices WHERE "Timestamp" >=  '{}' and "Timestamp" <=  '{}' ORDER BY "Timestamp" """.format(start_date_simulation.strftime('%Y-%m-%d'), end_date_simulation.strftime('%Y-%m-%d'))
v2g_prices = pd.read_sql(sql, engine)
v2g_prices.head()

Unnamed: 0,Timestamp,Secondary_positive_v2g_prices_chf_kwh,Secondary_negative_v2g_prices_chf_kwh
0,2019-01-01 00:00:00,0.06837,0.045588
1,2019-01-01 00:15:00,0.068838,0.045896
2,2019-01-01 00:30:00,0.068838,0.045896
3,2019-01-01 00:45:00,0.068838,0.045896
4,2019-01-01 01:00:00,0.068838,0.045896


In [19]:
# save in dict for fast data access
delta = timedelta(days=1)
v2g_price_dict_simulation = {}
start_date_v2g = start_date_simulation
while start_date_v2g <= end_date_simulation:
    v2g_price_day_positive = v2g_prices[v2g_prices['Timestamp'].dt.date == pd.Timestamp(start_date_v2g).date()].drop(["Timestamp"],axis = 1)["Secondary_positive_v2g_prices_chf_kwh"].values
    v2g_price_day_negative = v2g_prices[v2g_prices['Timestamp'].dt.date == pd.Timestamp(start_date_v2g).date()].drop(["Timestamp"],axis = 1)["Secondary_negative_v2g_prices_chf_kwh"].values
    v2g_price_dict_simulation[start_date_v2g.strftime('%Y-%m-%d')] = [v2g_price_day_positive, v2g_price_day_negative]
    start_date_v2g += delta

In [20]:
v2g_price_dict_simulation.keys()

dict_keys(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04', '2019-01-05', '2019-01-06', '2019-01-07', '2019-01-08'])

# Check environment

In [44]:
# check if enviornment fullfils requirements of gym and stable-baselines3

# load discrete table
sql =  "SELECT * FROM discrete.discrete_weeks_{} ORDER BY vehicle_no".format(0)
data = pd.read_sql(sql, engine)
    
# load discrete planned reservation table
sql =  "SELECT * FROM msc_2023_dominik.planned_reservations_discrete_{} ORDER BY vehicle_no".format(0)
planned_reservations = pd.read_sql(sql, engine)
    
# load discrete planned reservation duration table
sql =  "SELECT * FROM msc_2023_dominik.planned_durations_discrete_{} ORDER BY vehicle_no".format(0)
planned_durations = pd.read_sql(sql, engine)
end = time.time()

# get number of vehicles
nr_vehicles = len(vehicles)

# maximal simulation length
if nr_iterations > 577:
    nr_iterations = 577
    
count = 0
# iterate over weeks (for loading weekly discrete data)
for week_nr in range(start_week, 1):
    # iteration for each day
    for day in range(98,99,96):
        
        # calculate number of timesteps since first day of simulation
        timesteps_since_start = count * 96
        
        # all requested days are simulated
        if count == nr_iterations:
            break
            
        # get date
        date_day = pd.to_datetime(data.columns[day-97]).date()
        date_day_string = date_day.strftime('%Y-%m-%d')
        
        # load reservations
        reservations = reservations_dict[date_day_string]
        
        # load electricity prices for charging
        electricity_price = charging_costs_dict[date_day_string]
        
        # load secondary energy prices for v2g
        v2g_price = v2g_price_dict[date_day_string]
    
        # select discrete data of day
        daily_data = data.iloc[:,day-97:day-1]
        planned_reservations_day = planned_reservations.iloc[:,day-97 + 1:day + 1] 
        planned_durations_day = planned_durations.iloc[:,day-97 + 1:day + 1] 
        
        # create environment
        env = CarsharingEnv(stations, vehicles, planned_bookings = True, v2g_penalty = 10000, penalty_per_kwh = 0, daily_data = daily_data, reservations = reservations,
                           electricity_price = electricity_price, timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day,
                           planned_durations = planned_durations_day, RL = True)
        
        # check implementation 
        checker_gym(env)
        checker_baselines3(env)
        # count number of simulated days
        count += 1
        



# Train Agent

In [21]:
# check support of GPU
stable_baselines3.common.utils.get_device(device='cpu')

device(type='cpu')

Start simulation by running the following cell:

In [22]:
def validation(model):
    nr_vehicles = len(vehicles)
    
    global nr_iterations_simulation

    # maximal simulation length
    if nr_iterations_simulation > 577:
        nr_iterations_simulation = 577
        
    total_reward = 0

    count = 0
    # iterate over weeks (for loading weekly discrete data)
    for week_nr in range(start_week_simulation, math.ceil((start_week_simulation * 7 + nr_iterations_simulation) / 7)):
        # load discrete car-sharing table
        sql =  "SELECT * FROM discrete.discrete_weeks_{} ORDER BY vehicle_no limit 10".format(week_nr)
        data = pd.read_sql(sql, engine)

        # load discrete planned reservation table
        sql =  "SELECT * FROM msc_2023_dominik.planned_reservations_discrete_{} ORDER BY vehicle_no limit 10".format(week_nr)
        planned_reservations = pd.read_sql(sql, engine)

        # load discrete planned reservation duration table
        sql =  "SELECT * FROM msc_2023_dominik.planned_durations_discrete_{} ORDER BY vehicle_no limit 10".format(week_nr)
        planned_durations = pd.read_sql(sql, engine)
        

        # iteration for each day
        for day in range(98,676,96):

            # calculate number of timesteps since first day of simulation
            timesteps_since_start = count * 96

            # all requested days are simulated
            if count == nr_iterations:
                break

            # get date
            date_day = pd.to_datetime(data.columns[day-97]).date()
            date_day_string = date_day.strftime('%Y-%m-%d')

            # load reservations
            reservations = reservations_dict_simulation[date_day_string]

            # load electricity prices for charging
            electricity_price = charging_costs_dict_simulation[date_day_string]

            # load secondary energy prices for v2g
            v2g_price = v2g_price_dict_simulation[date_day_string]

            # select discrete data of day
            daily_data = data.iloc[:,day-97:day-1]
            planned_reservations_day = planned_reservations.iloc[:,day-97 + 1:day + 1] 
            planned_durations_day = planned_durations.iloc[:,day-97 + 1:day + 1] 

            # reset environment at beginnning of simulation
            if count == 0:
                environment = CarsharingEnv(stations, vehicles, planned_bookings = True, 
                                   daily_data = daily_data, reservations = reservations, electricity_price = electricity_price,
                                    timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day, 
                                    planned_durations = planned_durations_day, random_seed_number = 42, v2g_demand_event_min = 10, v2g_demand_event_max = 10, v2g_penalty = 100, RL = True)
                s = environment.reset()

            # beginn new day without reseting environemnt 
            else:
                environment.next_day(daily_data, reservations, electricity_price, timesteps_since_start, v2g_price, planned_reservations_day, planned_durations_day)
                
            # simulate day in 15 min steps
            done = False
            counter = 0
            while not done:

                # get your action 
                act, _states = model.predict(s)

                # proceed one time step
                s, rew, done, _ = environment.step(act)
                
                total_reward += rew

                counter +=1

            # plot summary statistics of episode (day)
            #environment.daily_summary_statistics()

            # plot summary statistic over full simulation period
            #if count == nr_iterations - 1:
              #  environment.episode_summary_statistics(nr_iterations)

            # count number of simulated days
            count += 1
            
    return total_reward

In [30]:
#%%snakeviz
# get number of vehicles
nr_vehicles = len(vehicles)

# maximal simulation length
if nr_iterations > 577:
    nr_iterations = 577

reward_list = []
count_list = []
count = 0
model = None
# iterate over weeks (for loading weekly discrete data)
for j in range(0,4286):
    week_nr = random.randrange(1,80)
    # load discrete car-sharing table
    sql =  "SELECT * FROM discrete.discrete_weeks_{} ORDER BY vehicle_no limit 10".format(0)
    data = pd.read_sql(sql, engine)

    # load discrete planned reservation table
    sql =  "SELECT * FROM msc_2023_dominik.planned_reservations_discrete_{} ORDER BY vehicle_no limit 10".format(0)
    planned_reservations = pd.read_sql(sql, engine)

    # load discrete planned reservation duration table
    sql =  "SELECT * FROM msc_2023_dominik.planned_durations_discrete_{} ORDER BY vehicle_no limit 10".format(0)
    planned_durations = pd.read_sql(sql, engine)
    counter = 0
    # iteration for each day
    for day in range(98,676,96):

        # calculate number of timesteps since first day of simulation
        timesteps_since_start = counter * 96 + week_nr*7*96

        # get date
        date_day = pd.to_datetime(data.columns[day-97]).date()
        date_day_string = date_day.strftime('%Y-%m-%d')

        # load reservations
        reservations = reservations_dict[date_day_string]

        # load electricity prices for charging
        electricity_price = charging_costs_dict[date_day_string]

        # load secondary energy prices for v2g
        v2g_price = v2g_price_dict[date_day_string]

        # select discrete data of day
        daily_data = data.iloc[:,day-97:day-1]
        planned_reservations_day = planned_reservations.iloc[:,day-97 + 1:day + 1] 
        planned_durations_day = planned_durations.iloc[:,day-97 + 1:day + 1] 

        # reset environment at beginnning of simulation
        if count == 0:

            # create environment
            env = CarsharingEnv(stations, vehicles, planned_bookings = True, v2g_penalty = 100, penalty_per_kwh = 1.0, soc_initial_low=0.0, soc_initial_high=0.0, daily_data = daily_data, reservations = reservations,
                           electricity_price = electricity_price, timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day,
                           planned_durations = planned_durations_day, max_distance_car_assingment=300, random_seed_number = 234, RL = True, v2g_demand_event_min = 10, v2g_demand_event_max = 10)

            # create RL model
            model = PPO("MlpPolicy",env, verbose=0, gamma = 1.0, stats_window_size = 1, n_epochs = 1,  learning_rate=0.0003, n_steps=95, batch_size = 95,device ="cpu")
            model.policy.mlp_extractor.policy_net = nn.Sequential(
                nn.Linear(model.policy.mlp_extractor.policy_net[0].in_features, 42),  # Modify the first layer
                nn.Tanh(),
                nn.Linear(42, 42),
                model.policy.mlp_extractor.policy_net[3]
            )
            model.policy.action_net =  nn.Linear(42, 30)
            model.policy.mlp_extractor.value_net = nn.Sequential(
                nn.Linear(model.policy.mlp_extractor.value_net[0].in_features, 42),  # Modify the first layer
                nn.Tanh(),
                nn.Linear(42, 42),
                model.policy.mlp_extractor.value_net[3]
            )
            model.policy.value_net =  nn.Linear(42, 1)
            new_logger = configure( "/tmp/sb3_log/", ["csv", "tensorboard"])
            model.set_logger(new_logger)

            #model.policy.to(device)

            r = validation(model)
            print("Validation reward: ", r, " CHF   Episodes learned: ",count)
            reward_list.append(r)
            count_list.append(count)

        else: 
            #env.load_new_data(daily_data = daily_data, reservations = reservations,
                #          electricity_price = electricity_price, timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day,
                #          planned_durations = planned_durations_day)
            s = env.reset()


        # learn one episode
        model.learn(total_timesteps=95, reset_num_timesteps=False)


        if count in range(5,30000,100):
            r = validation(model)
            print("Validation reward: ", r, " CHF   Episodes learned: ",count)
            reward_list.append(r)
            count_list.append(count)

        counter += 1

        count += 1
    if count == 5000:
        model.save("car_sharing_v2g_model_small")
        np.save("reward_list_model_small", reward_list)
        np.save("count_list_model_small", count_list)
    if count == 10000:
        model.save("car_sharing_v2g_model_small")
        np.save("reward_list_model_small", reward_list)
        np.save("count_list_model_small", count_list)
    if count == 20000:
        model.save("car_sharing_v2g_model_small")
        np.save("reward_list_model_small", reward_list)
        np.save("count_list_model_small", count_list)
    if count == 29990:
        model.save("car_sharing_v2g_model_small")
        np.save("reward_list_model_small", reward_list)
        np.save("count_list_model_small", count_list)

    

KeyError: '2019-01-01'

In [22]:
def validation(model):
    nr_vehicles = len(vehicles)
    
    global nr_iterations_simulation

    # maximal simulation length
    if nr_iterations_simulation > 577:
        nr_iterations_simulation = 577
        
    total_reward = 0

    count = 0
    # iterate over weeks (for loading weekly discrete data)
    for week_nr in range(start_week_simulation, math.ceil((start_week_simulation * 7 + nr_iterations_simulation) / 7)):
        # load discrete car-sharing table
        sql =  "SELECT * FROM discrete.discrete_weeks_{} ORDER BY vehicle_no limit 10".format(week_nr)
        data = pd.read_sql(sql, engine)

        # load discrete planned reservation table
        sql =  "SELECT * FROM msc_2023_dominik.planned_reservations_discrete_{} ORDER BY vehicle_no limit 10".format(week_nr)
        planned_reservations = pd.read_sql(sql, engine)

        # load discrete planned reservation duration table
        sql =  "SELECT * FROM msc_2023_dominik.planned_durations_discrete_{} ORDER BY vehicle_no limit 10".format(week_nr)
        planned_durations = pd.read_sql(sql, engine)
        

        # iteration for each day
        for day in range(98,676,96):
            day = 98
            # calculate number of timesteps since first day of simulation
            timesteps_since_start = count * 96

            # all requested days are simulated
            if count == nr_iterations:
                break

            # get date
            date_day = pd.to_datetime(data.columns[day-97]).date()
            date_day_string = date_day.strftime('%Y-%m-%d')

            # load reservations
            reservations = reservations_dict_simulation[date_day_string]

            # load electricity prices for charging
            electricity_price = charging_costs_dict_simulation[date_day_string]

            # load secondary energy prices for v2g
            v2g_price = v2g_price_dict_simulation[date_day_string]

            # select discrete data of day
            daily_data = data.iloc[:,day-97:day-1]
            planned_reservations_day = planned_reservations.iloc[:,day-97 + 1:day + 1] 
            planned_durations_day = planned_durations.iloc[:,day-97 + 1:day + 1] 

            # reset environment at beginnning of simulation
            if count == 0:
                environment = CarsharingEnv(stations, vehicles, planned_bookings = True, 
                                   daily_data = daily_data, reservations = reservations, electricity_price = electricity_price, soc_initial_low=0.0, soc_initial_high=0.0,
                                    timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day, 
                                    planned_durations = planned_durations_day, penalty_per_kwh = 1.0, random_seed_number = 42, v2g_demand_event_min = 10, v2g_demand_event_max = 10, v2g_penalty = 100, RL = True)
                s = environment.reset()

            # beginn new day without reseting environemnt 
            else:
                environment.next_day(daily_data, reservations, electricity_price, timesteps_since_start, v2g_price, planned_reservations_day, planned_durations_day)
                s = environment.reset()
            # simulate day in 15 min steps
            done = False
            counter = 0
            while not done:

                # get your action 
                act, _states = model.predict(s)

                # proceed one time step
                s, rew, done, _ = environment.step(act)
                
                total_reward += rew

                counter +=1

            # plot summary statistics of episode (day)
            #environment.daily_summary_statistics()

            # plot summary statistic over full simulation period
            #if count == nr_iterations - 1:
              #  environment.episode_summary_statistics(nr_iterations)

            # count number of simulated days
            count += 1
            
    return total_reward

In [23]:
#%%snakeviz
# get number of vehicles
nr_vehicles = len(vehicles)

# maximal simulation length
if nr_iterations > 577:
    nr_iterations = 577

reward_list = []
count_list = []
count = 0
model = None
# iterate over weeks (for loading weekly discrete data)
for j in range(0,4286):
    week_nr = 0
    # load discrete car-sharing table
    sql =  "SELECT * FROM discrete.discrete_weeks_{} ORDER BY vehicle_no limit 10".format(0)
    data = pd.read_sql(sql, engine)

    # load discrete planned reservation table
    sql =  "SELECT * FROM msc_2023_dominik.planned_reservations_discrete_{} ORDER BY vehicle_no limit 10".format(0)
    planned_reservations = pd.read_sql(sql, engine)

    # load discrete planned reservation duration table
    sql =  "SELECT * FROM msc_2023_dominik.planned_durations_discrete_{} ORDER BY vehicle_no limit 10".format(0)
    planned_durations = pd.read_sql(sql, engine)
    counter = 0
    # iteration for each day
    for day in range(98,676,96):

        # calculate number of timesteps since first day of simulation
        timesteps_since_start = counter * 96 

        # get date
        date_day = pd.to_datetime(data.columns[day-97]).date()
        date_day_string = date_day.strftime('%Y-%m-%d')

        # load reservations
        reservations = reservations_dict_simulation[date_day_string]

        # load electricity prices for charging
        electricity_price = charging_costs_dict_simulation[date_day_string]

        # load secondary energy prices for v2g
        v2g_price = v2g_price_dict_simulation[date_day_string]

        # select discrete data of day
        daily_data = data.iloc[:,day-97:day-1]
        planned_reservations_day = planned_reservations.iloc[:,day-97 + 1:day + 1] 
        planned_durations_day = planned_durations.iloc[:,day-97 + 1:day + 1] 

        # reset environment at beginnning of simulation
        if count == 0:
            environment = CarsharingEnv(stations, vehicles, planned_bookings = True, 
                               daily_data = daily_data, reservations = reservations, electricity_price = electricity_price, soc_initial_low=0.0, soc_initial_high=0.0,
                                timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day, 
                                planned_durations = planned_durations_day, random_seed_number = 42,penalty_per_kwh = 1.0, v2g_demand_event_min = 10, v2g_demand_event_max = 10, v2g_penalty = 100, RL = True)
            s = environment.reset()

            # create RL model
            model = PPO("MlpPolicy",environment, verbose=0, normalize_advantage  = False, gamma = 1.0, stats_window_size = 1, n_epochs = 10,  learning_rate=0.0003, n_steps=96, batch_size = 1,device ="cpu")
            model.policy.mlp_extractor.policy_net = nn.Sequential(
                nn.Linear(model.policy.mlp_extractor.policy_net[0].in_features, 42),  # Modify the first layer
                nn.Tanh(),
                nn.Linear(42, 42),
                model.policy.mlp_extractor.policy_net[3]
            )
            model.policy.action_net =  nn.Linear(42, 30)
            model.policy.mlp_extractor.value_net = nn.Sequential(
                nn.Linear(model.policy.mlp_extractor.value_net[0].in_features, 42),  # Modify the first layer
                nn.Tanh(),
                nn.Linear(42, 42),
                model.policy.mlp_extractor.value_net[3]
            )
            model.policy.value_net =  nn.Linear(42, 1)
            new_logger = configure( "/tmp/sb3_log/", ["csv", "tensorboard"])
            model.set_logger(new_logger)

            #model.policy.to(device)

            r = validation(model)
            print("Validation reward: ", r, " CHF   Episodes learned: ",count)
            reward_list.append(r)
            count_list.append(count)

        else: 
            #env.load_new_data(daily_data = daily_data, reservations = reservations,
                #          electricity_price = electricity_price, timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day,
                #          planned_durations = planned_durations_day)
            s = environment.reset()


        # learn one episode
        model.learn(total_timesteps=95, reset_num_timesteps=False)


        if count in range(5,30000,100):
            r = validation(model)
            print("Validation reward: ", r, " CHF   Episodes learned: ",count)
            reward_list.append(r)
            count_list.append(count)

        counter += 1

        count += 1
    if count == 5000:
        model.save("car_sharing_v2g_model_small")
        np.save("reward_list_model_small", reward_list)
        np.save("count_list_model_small", count_list)
    if count == 10000:
        model.save("car_sharing_v2g_model_small")
        np.save("reward_list_model_small", reward_list)
        np.save("count_list_model_small", count_list)
    if count == 20000:
        model.save("car_sharing_v2g_model_small")
        np.save("reward_list_model_small", reward_list)
        np.save("count_list_model_small", count_list)
    if count == 29990:
        model.save("car_sharing_v2g_model_small")
        np.save("reward_list_model_small", reward_list)
        np.save("count_list_model_small", count_list)

    


1
[0 1 1 1 0 2 0 2 2 2]
charging
-0.4502630055000001
trips
0
cancellations
0
rew_energy_difference
0
cancelled_revenue
0.0
v2g_reward
0
rew
-0.4502630055000001
[ 1.856e+03  1.856e+03  4.215e+03  4.215e+03  1.856e+03  1.856e+03
 -1.000e+00  3.234e+03  3.234e+03  1.766e+03  0.000e+00  2.750e-02
  2.750e-02  2.750e-02  0.000e+00  0.000e+00  0.000e+00  0.000e+00
  0.000e+00  0.000e+00 -1.000e+00 -1.000e+00 -1.000e+00 -1.000e+00
 -1.000e+00 -1.000e+00 -1.000e+00 -1.000e+00 -1.000e+00 -1.000e+00
 -1.000e+00 -1.000e+00 -1.000e+00 -1.000e+00 -1.000e+00 -1.000e+00
 -1.000e+00 -1.000e+00 -1.000e+00 -1.000e+00  0.000e+00  1.000e+00]

2
[2 1 1 0 0 0 1 0 0 2]
charging
-0.30017533700000004
trips
0
cancellations
0
rew_energy_difference
0
cancelled_revenue
36.0
v2g_reward
0
rew
35.699824663
[ 1.856e+03  1.856e+03  4.215e+03  4.215e+03  1.856e+03  1.856e+03
 -1.000e+00  3.234e+03  3.234e+03  1.766e+03  0.000e+00  5.500e-02
  5.500e-02  2.750e-02  0.000e+00  0.000e+00  0.000e+00  0.000e+00
  0.000e+00 


KeyboardInterrupt



In [32]:
model2 = None
count_next_start = 0
# iterate over weeks (for loading weekly discrete data)
for j in range(0,70):
    print(j)
    week_nr = random.randrange(1,80)
    # load discrete car-sharing table
    sql =  "SELECT * FROM discrete.discrete_weeks_{} ORDER BY vehicle_no limit 10".format(week_nr)
    data = pd.read_sql(sql, engine)

    # load discrete planned reservation table
    sql =  "SELECT * FROM msc_2023_dominik.planned_reservations_discrete_{} ORDER BY vehicle_no limit 10".format(week_nr)
    planned_reservations = pd.read_sql(sql, engine)

    # load discrete planned reservation duration table
    sql =  "SELECT * FROM msc_2023_dominik.planned_durations_discrete_{} ORDER BY vehicle_no limit 10".format(week_nr)
    planned_durations = pd.read_sql(sql, engine)
    counter = 0
    # iteration for each day
    for day in range(98,676,96):

        # calculate number of timesteps since first day of simulation
        timesteps_since_start = counter * 96 + week_nr*7*96

        # all requested days are simulated
        if count == nr_iterations:
            break

        # get date
        date_day = pd.to_datetime(data.columns[day-97]).date()
        date_day_string = date_day.strftime('%Y-%m-%d')

        # load reservations
        reservations = reservations_dict[date_day_string]

        # load electricity prices for charging
        electricity_price = charging_costs_dict[date_day_string]

        # load secondary energy prices for v2g
        v2g_price = v2g_price_dict[date_day_string]

        # select discrete data of day
        daily_data = data.iloc[:,day-97:day-1]
        planned_reservations_day = planned_reservations.iloc[:,day-97 + 1:day + 1] 
        planned_durations_day = planned_durations.iloc[:,day-97 + 1:day + 1] 

        # reset environment at beginnning of simulation
        if count_next_start == 0:

            model2 = PPO("MlpPolicy",env, verbose=0, stats_window_size = 1, n_epochs = 1, n_steps=95, batch_size = 95,device ="cpu")
            model2.policy.mlp_extractor.policy_net = nn.Sequential(
                nn.Linear(model.policy.mlp_extractor.policy_net[0].in_features, 64),  # Modify the first layer
                nn.Tanh(),
                nn.Linear(64, 64),
                model.policy.mlp_extractor.policy_net[3]
            )
            model2.policy.action_net =  nn.Linear(64, 30)
            model2.policy.mlp_extractor.value_net = nn.Sequential(
                nn.Linear(model.policy.mlp_extractor.value_net[0].in_features, 64),  # Modify the first layer
                nn.Tanh(),
                nn.Linear(64, 64),
                model.policy.mlp_extractor.value_net[3]
            )
            model2.policy.value_net =  nn.Linear(64, 1)
            new_logger = configure( "/tmp/sb3_log/", ["csv", "tensorboard"])
            model2.set_logger(new_logger)
            model2.set_parameters("car_sharing_v2g_model_small", device = "cpu")

            new_logger = configure( "/tmp/sb3_log/", ["csv", "tensorboard"])
            model2.set_logger(new_logger)

            #model.policy.to(device)

            r = validation(model2)
            print("Validation reward: ", r, " CHF   Episodes learned: ",count)
            reward_list.append(r)
            count_list.append(count)

        else: 
            env.load_new_data(daily_data = daily_data, reservations = reservations,
                          electricity_price = electricity_price, timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day,
                          planned_durations = planned_durations_day)
            s = env.reset()


        # learn one episode
        model2.learn(total_timesteps=95, reset_num_timesteps=False)


        if count in range(5,30000,100):
            r = validation(model)
            print("Validation reward: ", r, " CHF   Episodes learned: ",count)
            reward_list.append(r)
            count_list.append(count)

        counter += 1

        count += 1
        
        count_next_start += 1

model2.save("car_sharing_v2g_model_small")
np.save("reward_list_model_small", reward_list)
np.save("count_list_model_small", count_list)

    

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24



KeyboardInterrupt



In [None]:
model = PPO("MlpPolicy",env, verbose=0, stats_window_size = 1, n_epochs = 1, n_steps=95, batch_size = 95,device ="cpu")
    model.policy.mlp_extractor.policy_net = nn.Sequential(
        nn.Linear(model.policy.mlp_extractor.policy_net[0].in_features, 64),  # Modify the first layer
        nn.Tanh(),
        nn.Linear(64, 64),
        model.policy.mlp_extractor.policy_net[3]
    )
    model.policy.action_net =  nn.Linear(64, 30)
    model.policy.mlp_extractor.value_net = nn.Sequential(
        nn.Linear(model.policy.mlp_extractor.value_net[0].in_features, 64),  # Modify the first layer
        nn.Tanh(),
        nn.Linear(64, 64),
        model.policy.mlp_extractor.value_net[3]
    )
    model.policy.value_net =  nn.Linear(64, 1)
    new_logger = configure( "/tmp/sb3_log/", ["csv", "tensorboard"])
    model.set_logger(new_logger)
    model.set_parameters("car_sharing_v2g_model_small", device = "cpu")




In [None]:
#%%snakeviz
# get number of vehicles
nr_vehicles = len(vehicles)

# maximal simulation length
if nr_iterations > 577:
    nr_iterations = 577

reward_list = []
count_list = []
count = 0

model = None
# iterate over weeks (for loading weekly discrete data)
for i in range(0,60):
    print(i)
    for j in range(0,70):
        print(j)
        week_nr = random.randrange(1,80)
        # load discrete car-sharing table
        sql =  "SELECT * FROM discrete.discrete_weeks_{} ORDER BY vehicle_no limit 10".format(week_nr)
        data = pd.read_sql(sql, engine)

        # load discrete planned reservation table
        sql =  "SELECT * FROM msc_2023_dominik.planned_reservations_discrete_{} ORDER BY vehicle_no limit 10".format(week_nr)
        planned_reservations = pd.read_sql(sql, engine)

        # load discrete planned reservation duration table
        sql =  "SELECT * FROM msc_2023_dominik.planned_durations_discrete_{} ORDER BY vehicle_no limit 10".format(week_nr)
        planned_durations = pd.read_sql(sql, engine)
        counter = 0
        # iteration for each day
        for day in range(98,676,96):

            # calculate number of timesteps since first day of simulation
            timesteps_since_start = counter * 96 + week_nr*7*96

            # all requested days are simulated
            if count == nr_iterations:
                break

            # get date
            date_day = pd.to_datetime(data.columns[day-97]).date()
            date_day_string = date_day.strftime('%Y-%m-%d')

            # load reservations
            reservations = reservations_dict[date_day_string]

            # load electricity prices for charging
            electricity_price = charging_costs_dict[date_day_string]

            # load secondary energy prices for v2g
            v2g_price = v2g_price_dict[date_day_string]

            # select discrete data of day
            daily_data = data.iloc[:,day-97:day-1]
            planned_reservations_day = planned_reservations.iloc[:,day-97 + 1:day + 1] 
            planned_durations_day = planned_durations.iloc[:,day-97 + 1:day + 1] 

            # reset environment at beginnning of simulation
            if count == 0:

                # create environment
                env = CarsharingEnv(stations, vehicles, planned_bookings = True, v2g_penalty = 100, penalty_per_kwh = 1.0, daily_data = daily_data, reservations = reservations,
                               electricity_price = electricity_price, timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day,
                               planned_durations = planned_durations_day, max_distance_car_assingment=300, random_seed_number = 234, RL = True, v2g_demand_event_min = 10, v2g_demand_event_max = 10)

                # create RL model
                model = PPO("MlpPolicy",env, verbose=0, stats_window_size = 1, n_epochs = 1, n_steps=95, batch_size = 95,device ="cpu")
                model.policy.mlp_extractor.policy_net = nn.Sequential(
                    nn.Linear(model.policy.mlp_extractor.policy_net[0].in_features, 64),  # Modify the first layer
                    nn.Tanh(),
                    nn.Linear(64, 64),
                    model.policy.mlp_extractor.policy_net[3]
                )
                model.policy.action_net =  nn.Linear(64, 30)
                model.policy.mlp_extractor.value_net = nn.Sequential(
                    nn.Linear(model.policy.mlp_extractor.value_net[0].in_features, 64),  # Modify the first layer
                    nn.Tanh(),
                    nn.Linear(64, 64),
                    model.policy.mlp_extractor.value_net[3]
                )
                model.policy.value_net =  nn.Linear(64, 1)
                new_logger = configure( "/tmp/sb3_log/", ["csv", "tensorboard"])
                model.set_logger(new_logger)

                #model.policy.to(device)

                r = validation(model)
                print("Validation reward: ", r, " CHF   Episodes learned: ",count)
                reward_list.append(r)
                count_list.append(count)

            else: 
                env.load_new_data(daily_data = daily_data, reservations = reservations,
                              electricity_price = electricity_price, timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day,
                              planned_durations = planned_durations_day)
                s = env.reset()
                
            print("day")


            # learn one episode
            model.learn(total_timesteps=95, reset_num_timesteps=False)


            if count in range(5,30000,100):
                r = validation(model)
                print("Validation reward: ", r, " CHF   Episodes learned: ",count)
                reward_list.append(r)
                count_list.append(count)

            counter += 1

            count += 1
   
    model.save("car_sharing_v2g_model_small")
    np.save("reward_list_model_small", reward_list)
    np.save("count_list_model_small", count_list)
    
    # create environment
    env = CarsharingEnv(stations, vehicles, planned_bookings = True, v2g_penalty = 100, penalty_per_kwh = 1.0, daily_data = daily_data, reservations = reservations,
                               electricity_price = electricity_price, timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day,
                               planned_durations = planned_durations_day, max_distance_car_assingment=300, random_seed_number = 234, RL = True, v2g_demand_event_min = 10, v2g_demand_event_max = 10)

    
    
    model = PPO("MlpPolicy",env, verbose=0, stats_window_size = 1, n_epochs = 1, n_steps=95, batch_size = 95,device ="cpu")
    model.policy.mlp_extractor.policy_net = nn.Sequential(
        nn.Linear(model.policy.mlp_extractor.policy_net[0].in_features, 64),  # Modify the first layer
        nn.Tanh(),
        nn.Linear(64, 64),
        model.policy.mlp_extractor.policy_net[3]
    )
    model.policy.action_net =  nn.Linear(64, 30)
    model.policy.mlp_extractor.value_net = nn.Sequential(
        nn.Linear(model.policy.mlp_extractor.value_net[0].in_features, 64),  # Modify the first layer
        nn.Tanh(),
        nn.Linear(64, 64),
        model.policy.mlp_extractor.value_net[3]
    )
    model.policy.value_net =  nn.Linear(64, 1)
    new_logger = configure( "/tmp/sb3_log/", ["csv", "tensorboard"])
    model.set_logger(new_logger)
    model.set_parameters("car_sharing_v2g_model_small", device = "cpu")



    

In [None]:
#%%snakeviz
# get number of vehicles
nr_vehicles = len(vehicles)

# maximal simulation length
if nr_iterations > 577:
    nr_iterations = 577

reward_list = []
count_list = []
count = 0
# iterate over weeks (for loading weekly discrete data)
for week_nr in range(start_week, math.ceil((start_week * 7 + nr_iterations) / 7)):
    # load discrete car-sharing table
    sql =  "SELECT * FROM discrete.discrete_weeks_{} ORDER BY vehicle_no".format(week_nr)
    data = pd.read_sql(sql, engine)
    
    # load discrete planned reservation table
    sql =  "SELECT * FROM msc_2023_dominik.planned_reservations_discrete_{} ORDER BY vehicle_no".format(week_nr)
    planned_reservations = pd.read_sql(sql, engine)
    
    # load discrete planned reservation duration table
    sql =  "SELECT * FROM msc_2023_dominik.planned_durations_discrete_{} ORDER BY vehicle_no".format(week_nr)
    planned_durations = pd.read_sql(sql, engine)
        
    # iteration for each day
    for day in range(98,676,96):
        
        # calculate number of timesteps since first day of simulation
        timesteps_since_start = count * 96 + week_nr*7*96
        
        # all requested days are simulated
        if count == nr_iterations:
            break
            
        # get date
        date_day = pd.to_datetime(data.columns[day-97]).date()
        date_day_string = date_day.strftime('%Y-%m-%d')
        
        # load reservations
        reservations = reservations_dict[date_day_string]
        
        # load electricity prices for charging
        electricity_price = charging_costs_dict[date_day_string]
        
        # load secondary energy prices for v2g
        v2g_price = v2g_price_dict[date_day_string]
    
        # select discrete data of day
        daily_data = data.iloc[:,day-97:day-1]
        planned_reservations_day = planned_reservations.iloc[:,day-97 + 1:day + 1] 
        planned_durations_day = planned_durations.iloc[:,day-97 + 1:day + 1] 
        
        # reset environment at beginnning of simulation
        if count == 0:
            
            # create environment
            env = CarsharingEnv(stations, vehicles, planned_bookings = True, v2g_penalty = 499, penalty_per_kwh = 1.0, daily_data = daily_data, reservations = reservations,
                           electricity_price = electricity_price, timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day,
                           planned_durations = planned_durations_day, max_distance_car_assingment=300, random_seed_number = 15451)
            model = PPO("MlpPolicy",env, verbose=1, n_steps=95, batch_size = 95,device ="cpu")
            model.policy.mlp_extractor.policy_net = nn.Sequential(
                nn.Linear(model.policy.mlp_extractor.policy_net[0].in_features, 1024),  # Modify the first layer
                nn.Tanh(),
                nn.Linear(1024, 5120),
                model.policy.mlp_extractor.policy_net[3]
            )
            model.policy.action_net =  nn.Linear(5120, 13260)
            model.policy.mlp_extractor.value_net = nn.Sequential(
                nn.Linear(model.policy.mlp_extractor.value_net[0].in_features, 1028),  # Modify the first layer
                nn.Tanh(),
                nn.Linear(1028, 128),
                model.policy.mlp_extractor.value_net[3]
            )
            model.policy.value_net =  nn.Linear(128, 1)
            
            # load parameters 
            model.set_parameters("car_sharing_v2g_model1", device = "cpu")
            
            #model.policy.to(device)
            
            r = validation(model)
            print("Validation reward: ", r, " CHF   Episodes learned: ",count)
            reward_list.append(r)
            count_list.append(count)
            
        else: 
            env.load_new_data(daily_data = daily_data, reservations = reservations,
                           electricity_price = electricity_price, timesteps_since_start = timesteps_since_start, v2g_price = v2g_price, planned_reservations = planned_reservations_day,
                           planned_durations = planned_durations_day)
            s = env.reset()
            
        
        # learn one episode
        model.learn(total_timesteps=95, reset_num_timesteps=False)
            
          
        print("")
        print("Learned episode: ",count)
        print("")
        
        if count in range(5,nr_iterations + 1,10):
            r = validation(model)
            print("Validation reward: ", r, " CHF   Episodes learned: ",count)
            reward_list.append(r)
            count_list.append(count)

            
        count += 1

In [27]:
model.save("car_sharing_v2g_model2")

In [28]:
model.save("car_sharing_v2g_model2")
np.save("reward_list_model2", reward_list)
np.save("count_list_model2", count_list)

In [94]:
print(count_list)

[0, 5, 105, 205, 305, 405, 505]


In [34]:
model.save("car_sharing_v2g_model1")

In [36]:
model.policy.mlp_extractor.policy_net

Sequential(
  (0): Linear(in_features=17682, out_features=1024, bias=True)
  (1): Tanh()
  (2): Linear(in_features=1024, out_features=5120, bias=True)
  (3): Tanh()
)

In [56]:
model.policy.mlp_extractor.policy_net = nn.Sequential(
    nn.Linear(model.policy.mlp_extractor.policy_net[0].in_features, 1024),  # Modify the first layer
    nn.Tanh(),
    nn.Linear(1024, 5120),
    model.policy.mlp_extractor.policy_net[3]
)
model.policy.action_net =  nn.Linear(5120, 13260)
model.policy.mlp_extractor.value_net = nn.Sequential(
    nn.Linear(model.policy.mlp_extractor.value_net[0].in_features, 1028),  # Modify the first layer
    nn.Tanh(),
    nn.Linear(1028, 128),
    model.policy.mlp_extractor.value_net[3]
)
model.policy.value_net =  nn.Linear(128, 1)


# print network
model.policy

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (pi_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (vf_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (policy_net): Sequential(
      (0): Linear(in_features=17682, out_features=1024, bias=True)
      (1): Tanh()
      (2): Linear(in_features=1024, out_features=5120, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=17682, out_features=1028, bias=True)
      (1): Tanh()
      (2): Linear(in_features=1028, out_features=128, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=5120, out_features=13260, bias=True)
  (value_net): Linear(in_features=128, out_features=1, bias=True)
)

In [58]:
model.set_parameters("car_sharing_v2g_model1", device = "cpu")

In [55]:
from stable_baselines3.common.policies import ActorCriticPolicy
from stable_baselines3.common.net_util import FlattenExtractor, MlpExtractor

class CustomPolicy(ActorCriticPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomPolicy, self).__init__(*args, **kwargs,
                                            net_arch=[dict(pi=[64, 64], vf=[64, 64])],
                                            features_extractor_class=FlattenExtractor,
                                            features_extractor_kwargs=dict(flatten_dim=1),
                                            )
        self.mlp_extractor = MlpExtractor(
            self.features_extractor.features_dim,
            net_arch=[1024, 5120],
            activation_fn=torch.nn.Tanh
        )

model = PPO(CustomPolicy, env)

ModuleNotFoundError: No module named 'stable_baselines3.common.net_util'

In [48]:
model1 = model.policy

In [53]:
model1.weights

AttributeError: 'ActorCriticPolicy' object has no attribute 'weights'

In [47]:
model.load("car_sharing_v2g_model1")

RuntimeError: Error(s) in loading state_dict for ActorCriticPolicy:
	size mismatch for mlp_extractor.policy_net.0.weight: copying a param with shape torch.Size([1024, 17682]) from checkpoint, the shape in current model is torch.Size([64, 17682]).
	size mismatch for mlp_extractor.policy_net.0.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp_extractor.policy_net.2.weight: copying a param with shape torch.Size([5120, 1024]) from checkpoint, the shape in current model is torch.Size([64, 64]).
	size mismatch for mlp_extractor.policy_net.2.bias: copying a param with shape torch.Size([5120]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp_extractor.value_net.0.weight: copying a param with shape torch.Size([1028, 17682]) from checkpoint, the shape in current model is torch.Size([64, 17682]).
	size mismatch for mlp_extractor.value_net.0.bias: copying a param with shape torch.Size([1028]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp_extractor.value_net.2.weight: copying a param with shape torch.Size([128, 1028]) from checkpoint, the shape in current model is torch.Size([64, 64]).
	size mismatch for mlp_extractor.value_net.2.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for action_net.weight: copying a param with shape torch.Size([13260, 5120]) from checkpoint, the shape in current model is torch.Size([13260, 64]).
	size mismatch for value_net.weight: copying a param with shape torch.Size([1, 128]) from checkpoint, the shape in current model is torch.Size([1, 64]).