In [1]:
# This is importent when we want to call this as a python script, because jupyter naturally has a higher recursion depth
import sys
sys.setrecursionlimit(3000)

# Print the PID when using nohup
import os
from icecream import ic
ic(os.getpid())

ic| os.getpid(): 263055


263055

In [2]:
from gymportal.data.ev_generators import get_standard_generator, RealWorldGenerator
from acnportal.acnsim import Linear2StageBattery
from gymportal.data.battery_generators import CustomizableBatteryGenerator
from gymportal.sim import get_charging_network, Recomputer, EvaluationSimulator, SimGenerator
from datetime import datetime, timedelta

import pytz
timezone = pytz.timezone("America/Los_Angeles")


# charging_network = get_charging_network('simple_acn', basic_evse=True, voltage=208,
#                                         network_kwargs={
#                                             'station_ids': ['CA-504', 'CA-503', 'CA-502', 'CA-501'],
#                                              #'station_ids': ['CA-501'],
# "aggregate_cap": 32 * 208 / 1000})

charging_network = get_charging_network('caltech', basic_evse=True, voltage=208,
                                        network_kwargs={"transformer_cap": 150})

battery_generator = CustomizableBatteryGenerator(
    voltage=208,
    period=1,
    battery_types=[
        Linear2StageBattery],
    max_power_function="normal",
)

ev_generator = RealWorldGenerator(battery_generator=battery_generator, site='caltech', period=1)
# ev_generator = get_standard_generator(
#     'caltech', battery_generator, seed=42, frequency_multiplicator=frequency_multiplicator, duration_multiplicator=2)

train_generator = SimGenerator(
    charging_network=charging_network,
    simulation_days=7,
    n_intervals=1,
    start_date=timezone.localize(datetime(2019, 1, 1)),
    ev_generator=ev_generator,
    recomputer=Recomputer(recompute_interval=10, sparse=True),
    sim_class=EvaluationSimulator,
)

ic(train_generator.end_date + timedelta(days=1))

eval_generator = SimGenerator(
    charging_network=charging_network,
    simulation_days=7,
    n_intervals=1,
    start_date=train_generator.end_date + timedelta(days=1),
    ev_generator=ev_generator,
    recomputer=Recomputer(recompute_interval=10, sparse=True),
    sim_class=EvaluationSimulator,
)

ic(eval_generator.end_date + timedelta(days=1))

validation_generator = SimGenerator(
    charging_network=charging_network,
    simulation_days=14,
    n_intervals=1,
    start_date=eval_generator.end_date + timedelta(days=1),
    ev_generator=ev_generator,
    recomputer=Recomputer(recompute_interval=10, sparse=True),
    sim_class=EvaluationSimulator,
)

ic(validation_generator.end_date + timedelta(days=1))
pass

ic| train_generator.end_date + timedelta(days=1): datetime.datetime(2019, 1, 9, 0, 0, tzinfo=<DstTzInfo 'America/Los_Angeles' PST-1 day, 16:00:00 STD>)
ic| eval_generator.end_date + timedelta(days=1): datetime.datetime(2019, 1, 17, 0, 0, tzinfo=<DstTzInfo 'America/Los_Angeles' PST-1 day, 16:00:00 STD>)
ic| validation_generator.end_date + timedelta(days=1): datetime.datetime(2019, 2, 1, 0, 0, tzinfo=<DstTzInfo 'America/Los_Angeles' PST-1 day, 16:00:00 STD>)


In [3]:
from src.pv.pv import read_pv_data

df_pv = read_pv_data("pv_time.csv")
df_pv.describe()

Unnamed: 0,P,G(i),H_sun,T2m,WS10m,Int
count,140256.0,140256.0,140256.0,140256.0,140256.0,140256.0
mean,30395.798319,267.983145,16.748906,17.931872,1.654366,0.0
std,40648.543625,356.157287,22.166668,7.394042,0.867817,0.0
min,0.0,0.0,-5.88,-2.23,0.0,0.0
25%,0.0,0.0,0.0,12.36,0.97,0.0
50%,87.0,7.26,0.52,17.31,1.59,0.0
75%,65764.5,564.28,31.9825,23.01,2.28,0.0
max,132601.5,1143.22,78.09,46.46,7.93,0.0


In [4]:
from gymportal.environment import *

In [14]:
from src.run_simulation import metrics
from gymportal.environment import *
from src.extra import unplug_penalty
from src.pv.metrics import pv_utilization_mean
from src.pv.observations import pv_observation_mean
from src.pv.rewards import pv_utilization
import numpy as np
from src.pv.utils import pv_to_A
from src.pv.pv import most_recent_P
from acnportal.acnsim import Simulator


def soft_charging_reward() -> SimReward:
    """
    Rewards for charge delivered in the last timestep.
    """

    def multi_reward(env: BaseSimInterface) -> MultiAgentDict:
        charging_rates = env.interface.charging_rates

        timestep = env.timestep
        prev_timestep = env.prev_timestep

        soft_reward = {
            station_id: 0 for station_id in env.interface.station_ids}

        for idx, station_id in enumerate(env.interface.station_ids):
            soft_reward[station_id] = np.sum(
                charging_rates[idx, prev_timestep: timestep]) / (env.interface.max_pilot_signal(station_id) * (
                    timestep - prev_timestep))

        return soft_reward

    def single_reward(env: BaseSimInterface) -> float:
        multi_dict = multi_reward(env)

        return float(np.sum(list(multi_dict.values()))) / len(multi_dict.keys())

    return SimReward(single_reward_function=single_reward,
                     multi_reward_function=multi_reward, name="soft_charging_reward")



def soft_charging_reward_pv_weighted(df_pv, transformer_cap) -> SimReward:
    """
    Rewards for charge delivered in the last timestep.
    """

    def multi_reward(env: BaseSimInterface) -> MultiAgentDict:
        charging_rates = env.interface.charging_rates

        timestep = env.timestep
        prev_timestep = env.prev_timestep

        soft_reward = {
            station_id: 0 for station_id in env.interface.station_ids}

        for idx, station_id in enumerate(env.interface.station_ids):
            soft_reward[station_id] = np.sum(
                charging_rates[idx, prev_timestep: timestep]) / (env.interface.max_pilot_signal(station_id) * (
                    timestep - prev_timestep))

        return soft_reward

    def single_reward(env: BaseSimInterface) -> float:        
        timestep_now = env.timestep
        timestep_prev = env.prev_timestep
        sim: Simulator = env.interface._simulator

        timesteps = np.array(
            list(
                range(timestep_prev, timestep_now, sim.period)
            )
        )

        timesteps_as_dt = [
            env.interface.timestep_to_datetime(t) for t in timesteps
        ]
        
        pvs_in_W = np.array(
            [most_recent_P(df_pv, dt) for dt in timesteps_as_dt]
        )
        
        ratio = pvs_in_W / transformer_cap
        
        charging_rates = env.interface.charging_rates
        charging_rates[:, timestep_prev: timestep_now]
        return ratio * np.sum(charging_rates[:, timestep_prev: timestep_now], axis=0) # assert shape ==(10,)
        

        # pvs_in_A = [pv_to_A(x, sim.network._voltages) for x in pvs_in_W]
        

    return SimReward(single_reward_function=single_reward,
                     multi_reward_function=multi_reward, name="soft_charging_reward_pv_weighted")


observation_objects = [
    charging_rates_observation_normalized(),
    percentage_of_magnitude_observation(),
    diff_pilots_charging_rates_observation_normalized(),
    cyclical_minute_observation(),
    cyclical_day_observation(),
    cyclical_month_observation(),
    cyclical_minute_observation_stay(),
    energy_delivered_observation_normalized(),
    num_active_stations_observation_normalized(),
    pilot_signals_observation_normalized(),
    cyclical_minute_observation_arrival(),
    cyclical_day_observation_arrival(),
    cyclical_month_observation_arrival(),
    pv_observation_mean(df_pv),
]

reward_objects = [
    # current_constraint_violation(),
    soft_charging_reward_pv_weighted(df_pv, 150),
    # constraint_charging_reward(),
    # unplug_penalty(),
    # pilot_charging_rate_difference_penalty(),
    pv_utilization(df_pv),
]

metrics["pv_utilization_mean"] = lambda sim: pv_utilization_mean(sim, df_pv)

In [15]:
from src.actions import ranking_schedule_plus, zero_centered_single_charging_schedule_normalized_clip

train_config = {"observation_objects": observation_objects, "action_object": zero_centered_single_charging_schedule_normalized_clip(),
                "reward_objects": reward_objects,
                "simgenerator": train_generator,
                "meet_constraints": True}

eval_config = train_config | {'simgenerator': eval_generator}
validation_config = train_config | {'simgenerator': validation_generator}

In [16]:
# import dill as pickle
# with open("caltech.pkl",'rb') as file:
#     env = pickle.load(file)

In [17]:
from src.utils import FlattenSimEnv


env = FlattenSimEnv(train_config)

In [18]:
lengths_load = []

for i in range(1):
    ic(f"preparing simulation {i}")
    env.reset()
    length = len(env.interface._simulator.event_queue.queue)
    lengths_load.append(length)
    
steps_per_epoch = np.sum(lengths_load) # look at all 46 weeks per epoch

ic| f"preparing simulation {i}": 'preparing simulation 0'


In [None]:
from src.ppo_custom.ppo_model import PPO

algo = PPO(env, max_episode_len=np.inf, steps_per_epoch=steps_per_epoch + 1)

In [None]:
from gymportal.evaluation import ACNSchedule, RllibSchedule
from acnportal.algorithms import UncontrolledCharging, SortedSchedulingAlgo, last_come_first_served, \
    first_come_first_served
from src.utils import CustomSchedule


models = {
    # "PPO": CustomSchedule(algo),
    # "FCFS": ACNSchedule(SortedSchedulingAlgo(first_come_first_served)),
    # "LCFS": ACNSchedule(SortedSchedulingAlgo(last_come_first_served)),
    # "Uncontrolled": ACNSchedule(UncontrolledCharging()),
}

models

In [None]:
import wandb
run = wandb.init(project="ppo_x", group="PV", name=f"soft_charging_weighted")

In [None]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint

from src.ppo_custom.callbacks import EvaluationMetricsCallback, EvaluationFigureCallback

wandb_logger = WandbLogger(log_model="all")
trainer = Trainer(
    max_epochs=6,
    logger=wandb_logger,
    accelerator="cpu",
    callbacks=[
        EvaluationMetricsCallback(models, metrics, eval_config, seed=42, run=run),
        ModelCheckpoint(save_top_k=-1, every_n_epochs=1,
                        save_on_train_epoch_end=True),
        EvaluationFigureCallback(charging_network, timezone, ev_generator, train_config, run=run),
    ]
)

res = trainer.fit(algo)
res

In [None]:
trainer.save_checkpoint("last_checkpoint.pkl")

In [None]:
run.finish()