# Experiment Collection #01

This notebook contains experiments regarding the use of a penalty term and enabling charging from the grid.

## 1. Basic Setup

In [None]:
# Jupyter setup
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [None]:
import ray
ray.shutdown()

In [None]:
import ray
import ray.rllib
import ray.tune 
import solara.envs.creator

## Initialising ray (starts background process for distributed computing)
ray.shutdown()
ray.init(logging_level="WARNING", object_store_memory= 25 * 10**9)

# Adding environment creator function to ray
ray.tune.registry.register_env("battery_control", solara.envs.creator.create_env)

## 2. Experiment Definition

In [None]:
from solara.constants import PROJECT_PATH
import solara.utils.rllib

EXPERIMENT_NAME = "experiment_01_penalty_grid"

# RL environment configuration
ENV_CONFIG = {
    'general': {
        'type': 'battery_control.BatteryControlEnv',
        'infeasible_control_penalty': ray.tune.grid_search([False, True]),
        'grid_charging': ray.tune.grid_search([True, False]),
        'logging_level': "RAY", # if using RLlib, set to 'RAY'
    },
    'components': {
        'battery': {
            'type': 'LithiumIonBattery',
            'size': 10,
            'chemistry': 'NMC',
            'time_step_len': 1,
        },
        'solar': {
            'type': 'DataPV',
            'data_path': PROJECT_PATH + "/data/solar_trace_data/PV_5796.txt",
            'fixed_sample_num': 12,
        },
        'load': {
            'type': 'DataLoad',
            'data_path': PROJECT_PATH + "/data/solar_trace_data/load_5796.txt",
            'fixed_sample_num': 12,
        },
        'grid': {
            'type': 'PeakGrid',
            'peak_threshold': 1.0,
        },
    },
}

# RL agent configuration
AGENT_CONFIG = {
    "env": "battery_control",
    "env_config": ENV_CONFIG,
    "gamma": 0.9999999,
    "lr": 5e-5,
    "model": {
        "fcnet_hiddens": [256, 256, 256, 256],
        "fcnet_activation": "relu",
        "post_fcnet_activation": "tanh",
    },
    # Utilities settings
    "framework": "torch",
    "log_level": "WARNING",
    #"num_workers": 9,
    #"num_gpus": 1,
    "callbacks": solara.utils.rllib.InfoCallback,
}

# Full experiment configuration including RL algorithm type
EXPERIMENT_CONFIG = {
    "run_or_experiment": "PPO",
    "config": AGENT_CONFIG,
    "stop": {"training_iteration": 400},
    "name": EXPERIMENT_NAME,
    "local_dir": "./tmp/tune/",
    "log_to_file": True,
    "checkpoint_freq": 1,
}

# Other settings
PLOT_DIR = PROJECT_PATH + "/figures/experiments/"

In [None]:
# Parallelisation Setup
if False:
    num_workers = 4
    gpu_count = 1
    reserved_capacity = 0.01 # Driver GPU
    num_gpus_per_worker = (gpu_count - reserved_capacity) / num_workers


    AGENT_CONFIG["num_workers"] = num_workers
    AGENT_CONFIG["num_gpus"] = num_gpus
    AGENT_CONFIG["num_envs_per_worker"]= 8
    

#AGENT_CONFIG["num_gpus"] = 1
#AGENT_CONFIG["num_envs_per_worker"]= 8
AGENT_CONFIG["num_workers"] = 10
AGENT_CONFIG["num_gpus"] = 1
#AGENT_CONFIG["remote_worker_envs"]= True

## 3. Running Experiment

In [None]:
# Setting visualisation in notebook
reporter = ray.tune.JupyterNotebookReporter(overwrite=True)
reporter.add_metric_column("custom_metrics/cost_mean")
reporter.add_metric_column("custom_metrics/power_diff_mean")

# Running experiment
analysis = ray.tune.run(
    progress_reporter=reporter,
    **EXPERIMENT_CONFIG,
    resume=True
)

## 4. Visualisation

In [None]:
import os
import solara.plot.pyplot
import matplotlib.pyplot as plt

exp_path = EXPERIMENT_CONFIG["local_dir"] + EXPERIMENT_CONFIG["name"]
exp_path = "./tmp/tune/PPO/"
state_files = [filename for filename in os.listdir(exp_path) if "experiment_state" in filename ]
last_state_file = sorted(state_files, reverse=True)[0]

analysis = ray.tune.ExperimentAnalysis(experiment_checkpoint_path=exp_path + last_state_file)
trials = analysis.fetch_trial_dataframes()
trials = {key: trials[key] for key in sorted(trials.keys())}  # Sort trials

In [None]:
# Creating helper function for plotting

def plot_trials(trials, 
                necessary_cond=None,
                other_conditions=None, 
                experiment_name="default_experiment",
                plot_name = "plot_00_default",
                plot_dir = "./figures"):
    """Plot progress over iterations for experiments."""
    
    solara.plot.pyplot.default_setup()
    
    other_conditions = list(other_conditions)
    
    for trial_name, trial_data in trials.items():
        if necessary_cond is None or necessary_cond[1] in trial_name:
            label = ""
            
            for i, (cond_label, condition) in enumerate(other_conditions):
                if condition in trial_name:
                    label += cond_label
                else: 
                    label += "no " + cond_label
                if i < len(other_conditions) - 1:
                    label += ", "
            
            label = label.capitalize()
            
            trace_len = 150
            x_values = np.arange(1,trace_len+1)
            ticks_gap = 25
            x_ticks = [1] + list(np.arange(ticks_gap,150+1, ticks_gap)) + [trace_len]
            trace = trial_data["custom_metrics/cost_mean"][0:trace_len]
            plt.plot(x_values,trace, label=label)
            
    plt.semilogy()
    plt.legend()
    plt.xlabel("Training iteration")
    plt.ylabel("Average cost per episode ($)")
    plt.xticks(x_ticks)
    
    plt.savefig(fname=plot_dir + experiment_name + "_" + plot_name + ".png")

In [None]:
plot_trials(trials, 
            necessary_cond=["grid charging", "grid_charging=False"], 
            other_conditions=[["penalty","infeasible_control_penalty=True"]],
            experiment_name=EXPERIMENT_NAME,
            plot_dir=PLOT_DIR,
            plot_name="plot_01_penalty")

In [None]:
plot_trials(trials, 
            necessary_cond=["penalty","infeasible_control_penalty=False"], 
            other_conditions=[["grid charging", "grid_charging=True"]],
            experiment_name=EXPERIMENT_NAME,
            plot_dir=PLOT_DIR,
            plot_name="plot_02_grid_charging_no_penalty")

In [None]:
plot_trials(trials, 
            necessary_cond=["penalty","infeasible_control_penalty=True"], 
            other_conditions=[["grid charging", "grid_charging=True"]],
            experiment_name=EXPERIMENT_NAME,
            plot_dir=PLOT_DIR,
            plot_name="plot_03_grid_charging_with_penalty")

In [None]:
other_conditions = [["penalty","infeasible_control_penalty=True"],
                    ["grid charging", "grid_charging=True"],]


plot_trials(trials, necessary_cond=None, other_conditions=other_conditions,
            experiment_name=EXPERIMENT_NAME,
            plot_dir=PLOT_DIR,
            plot_name="plot_04_all_trials")