# Experiment Collection #01

This notebook contains experiments regarding the use of a penalty term.

## 1. Basic Setup

In [1]:
# Jupyter setup
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [2]:
import ray
ray.shutdown()

In [3]:
import ray
import ray.rllib
import ray.tune 
import solara.envs.creator

## Initialising ray (starts background process for distributed computing)
ray.shutdown()
ray.init(logging_level="WARNING", object_store_memory= 25 * 10**9)

# Adding environment creator function to ray
ray.tune.registry.register_env("battery_control", solara.envs.creator.create_env)

## 2. Experiment Definition

In [4]:
from solara.constants import PROJECT_PATH

# RL environment configuration
ENV_CONFIG = {
    'general': {
        'type': 'battery_control.BatteryControlEnv',
        'infeasible_control_penalty': ray.tune.grid_search([False, True]),
        'grid_charging': ray.tune.grid_search([True, False]),
        'logging_level': "RAY", # if using RLlib, set to 'RAY'
    },
    'components': {
        'battery': {
            'type': 'LithiumIonBattery',
            'size': 10,
            'chemistry': 'NMC',
            'time_step_len': 1,
        },
        'solar': {
            'type': 'DataPV',
            'data_path': PROJECT_PATH + "/data/solar_trace_data/PV_5796.txt",
            'fixed_sample_num': 12,
        },
        'load': {
            'type': 'DataLoad',
            'data_path': PROJECT_PATH + "/data/solar_trace_data/load_5796.txt",
            'fixed_sample_num': 12,
        },
        'grid': {
            'type': 'PeakGrid',
            'peak_threshold': 1.0,
        },
    },
}

# RL agent configuration
AGENT_CONFIG = {
    "framework": "torch",
    #"num_workers": 9,
    #"num_gpus": 1,
    "env": "battery_control",
    "env_config": ENV_CONFIG,
    "gamma": 0.9999999,
    "log_level": "WARNING",
    "lr": 5e-5,
    "model": {
        "fcnet_hiddens": [256, 256, 256, 256],
        "fcnet_activation": "relu",
        "post_fcnet_activation": "tanh",
    },
}

# Full experiment configuration including RL algorithm type
EXPERIMENT_CONFIG = {
    "run_or_experiment": "PPO",
    "config": AGENT_CONFIG,
    "stop": {"training_iteration": 2},
    "local_dir": "./tmp/tune/",
    "log_to_file": True,
    "checkpoint_freq": 1,
}

In [5]:
# Parallelisation Setup
if False:
    num_workers = 4
    gpu_count = 1
    reserved_capacity = 0.01 # Driver GPU
    num_gpus_per_worker = (gpu_count - reserved_capacity) / num_workers


    AGENT_CONFIG["num_workers"] = num_workers
    AGENT_CONFIG["num_gpus"] = num_gpus
    AGENT_CONFIG["num_envs_per_worker"]= 8
    

#AGENT_CONFIG["num_gpus"] = 1
#AGENT_CONFIG["num_envs_per_worker"]= 8
AGENT_CONFIG["num_workers"] = 10
AGENT_CONFIG["num_gpus"] = 1
#AGENT_CONFIG["remote_worker_envs"]= True

In [6]:
from ray.rllib.evaluation import RolloutWorker

from ray.rllib.env import BaseEnv
from ray.rllib.policy import Policy
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.evaluation import MultiAgentEpisode
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.deprecation import deprecation_warning
from ray.rllib.utils.typing import AgentID, PolicyID

from typing import Dict, Optional, TYPE_CHECKING

import numpy as np


class MyCallbacks(ray.rllib.agents.callbacks.DefaultCallbacks):
    """Callback to add additional metrics over the training process from step infos."""
    
    info_keys = ["cost", "power_diff", "battery_cont"]
    
    def on_episode_start(self, *, worker: RolloutWorker, base_env: BaseEnv,
                         policies: Dict[str, Policy],
                         episode: MultiAgentEpisode, env_index: int, **kwargs):
        
        episode.user_data["infos"] = []

    def on_episode_step(self, *, worker: RolloutWorker, base_env: BaseEnv,
                        episode: MultiAgentEpisode, env_index: int, **kwargs):
        
        episode.user_data["infos"].append(episode.last_info_for())

    def on_episode_end(self, *, worker: RolloutWorker, base_env: BaseEnv,
                       policies: Dict[str, Policy], episode: MultiAgentEpisode,
                       env_index: int, **kwargs):
        
        for key in self.info_keys:
            if key in episode.user_data["infos"][0].keys():
                key_data = [info[key] for info in episode.user_data["infos"]]
                episode.custom_metrics[key] = sum(key_data)
        
AGENT_CONFIG["callbacks"] = MyCallbacks
#AGENT_CONFIG.pop("callbacks")

In [None]:
reporter = ray.tune.JupyterNotebookReporter(overwrite=True)
reporter.add_metric_column("custom_metrics/cost_mean")
reporter.add_metric_column("custom_metrics/power_diff_mean")

analysis = ray.tune.run(
    progress_reporter=reporter,
    **EXPERIMENT_CONFIG,
#resume=True
)

Trial name,status,loc,env_config/general/grid_charging,env_config/general/infeasible_control_penalty,iter,total time (s),ts,reward,custom_metrics/cost_mean,custom_metrics/power_diff_mean
PPO_battery_control_90ecf_00002,RUNNING,192.168.34.159:121627,True,True,2.0,20.5995,8000.0,-103.68,37.9455,65.7343
PPO_battery_control_90ecf_00003,PENDING,,False,True,,,,,,
PPO_battery_control_90ecf_00000,TERMINATED,,True,False,2.0,19.6626,8000.0,-28.6974,28.6974,
PPO_battery_control_90ecf_00001,TERMINATED,,False,False,2.0,20.3672,8000.0,-1.25425,1.25425,


In [None]:
ray.tune.JupyterNotebookReporter(overwrite=True).DEFAULT_COLUMNS

In [None]:
ray.rllibJupyterNotebookReporterainer

In [None]:
analysis.results

In [None]:
ray.rllib.agents.ppo.PPOTrainer(config=AGENT_CONFIG)