In [None]:
# Jupyter setup
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [None]:
import ray
import ray.rllib
import ray.tune 
import solara.envs.creator

## Initialising ray (starts background process for distributed computing)
ray.shutdown()
ray.init(logging_level="WARNING", object_store_memory=10**9)

# Adding environment creator function to ray
ray.tune.registry.register_env("battery_control", solara.envs.creator.create_env)

In [None]:
from solara.constants import PROJECT_PATH

ENV_CONFIG = {
    'general': {
        'type': 'battery_control.BatteryControlEnv',
        'infeasible_control_penalty': True,
        'grid_charging': True,
        'logging_level': "WARNING", # if using RLlib, set to 'RAY'
    },
    'components': {
        'battery': {
            'type': 'LithiumIonBattery',
            'size': 10,
            'chemistry': 'NMC',
            'time_step_len': 1,
        },
        'solar': {
            'type': 'DataPV',
            'data_path': PROJECT_PATH + "/data/solar_trace_data/PV_5796.txt",
            #'fixed_sample_num': 12,
        },
        'load': {
            'type': 'DataLoad',
            'data_path': PROJECT_PATH + "/data/solar_trace_data/load_5796.txt",
            #'fixed_sample_num': 12,
        },
        'grid': {
            'type': 'PeakGrid',
            'peak_threshold': 1.0,
        },
    },
}

AGENT_CONFIG = {
    "framework": "torch",
    "env": "battery_control",
    "env_config": ENV_CONFIG,
    "gamma": 0.9999999,
    "log_level": "WARNING",
    "lr": 5e-5,
    "model": {
        "fcnet_hiddens": [256, 256, 256, 256],
        "fcnet_activation": "relu",
        "post_fcnet_activation": "tanh",
    },
}

agent_class = ray.rllib.agents.ppo.PPOTrainer

In [None]:
analysis = ray.tune.run(
    agent_class, 
    config=AGENT_CONFIG, 
    stop={"training_iteration": 500},
    local_dir="./tmp/tune/",
    log_to_file=True,
    progress_reporter=ray.tune.JupyterNotebookReporter(overwrite=True),
    checkpoint_freq=1,
)

In [6]:
import solara.utils.rllib
import solara.plot.widgets
import copy

# We visualise on fixed load and solar data
FIXED_AGENT_CONFIG = copy.deepcopy(AGENT_CONFIG)
FIXED_AGENT_CONFIG["env_config"]["components"]["solar"]['fixed_sample_num'] = 12
FIXED_AGENT_CONFIG["env_config"]["components"]["load"]['fixed_sample_num'] = 12

agent_instance = agent_class(FIXED_AGENT_CONFIG)
#checkpoint_path = analysis.get_trial_checkpoints_paths(
#    trial=analysis.get_best_trial("episode_reward_mean", mode="max")
#)[0][0][:-30]
checkpoint_path = './tmp/tune/PPO_2021-06-07_18-21-37/PPO_battery_control_d0b10_00000_0_2021-06-07_18-21-37/'

episodes_data = solara.utils.rllib.run_episodes_from_checkpoints(agent=agent_instance, 
                                                                 check_save_path=checkpoint_path)
initial_visibility = ['load','pv_gen','energy_cont','net_load',
                      'charging_power','cost','price_threshold',
                      'actions']

solara.plot.widgets.InteractiveEpisodes(episodes_data, initial_visibility=initial_visibility)



InteractiveEpisodes(children=(VBox(children=(HBox(children=(Play(value=1, interval=400, max=282, min=1), IntSl…

In [7]:
agent_instance = agent_class(AGENT_CONFIG)
#checkpoint_path = analysis.get_trial_checkpoints_paths(
#    trial=analysis.get_best_trial("episode_reward_mean", mode="max")
#)[0][0][:-30]

episodes_data = solara.utils.rllib.run_episodes_from_checkpoints(agent=agent_instance, 
                                                                 check_save_path=checkpoint_path)
initial_visibility = ['load','pv_gen','energy_cont','net_load',
                      'charging_power','cost','price_threshold',
                      'actions']

solara.plot.widgets.InteractiveEpisodes(episodes_data, initial_visibility=initial_visibility)



InteractiveEpisodes(children=(VBox(children=(HBox(children=(Play(value=1, interval=400, max=282, min=1), IntSl…