# Experiment Collection #02

This notebook contains experiments regarding the impact of different parts of the observation space. The problem is the stochastic version.

## 1. Basic Setup

In [None]:
# Jupyter setup
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [None]:
import ray
import ray.rllib
import ray.tune 
import solara.envs.creator

## Initialising ray (starts background process for distributed computing)
ray.shutdown()
ray.init(logging_level="WARNING", object_store_memory= 25 * 10**9)

# Adding environment creator function to ray
ray.tune.registry.register_env("battery_control", solara.envs.creator.create_env)

# Output format of figures
OUT_FORMAT = ".svg"

## 2. Experiment Definition

In [None]:
from solara.constants import PROJECT_PATH
import solara.utils.rllib

EXPERIMENT_NAME = "experiment_02_obs_space_ausgrid"

# 
OBS_KEYS = ["load", "pv_gen", "battery_cont", "time_step",
            #"time_step_cont", "cum_load", "cum_pv_gen"
           ]
OBS_KEYS_OPTIONS = [OBS_KEYS[:i] + OBS_KEYS[i+1:] for i in range(len(OBS_KEYS))]
OBS_KEYS_OPTIONS.append(["battery_cont"])
OBS_KEYS_OPTIONS.append(OBS_KEYS)

# RL environment configuration
ENV_CONFIG = {
    'general': {
        'type': 'battery_control.BatteryControlEnv',
        'infeasible_control_penalty': True,
        'grid_charging': True,
        'obs_keys': ray.tune.grid_search(OBS_KEYS_OPTIONS),
        'logging_level': "RAY", # if using RLlib, set to 'RAY'
    },
    'components': {
        'battery': {
            'type': 'LithiumIonBattery',
            'size': 10,
            'chemistry': 'NMC',
            'time_step_len': 1,
        },
        'solar': {
            'type': 'DataPV',
            'data_path': PROJECT_PATH + "/data/ausgrid/processed/house2_solar_gen.txt",
            'fixed_sample_num': None,
        },
        'load': {
            'type': 'DataLoad',
            'data_path': PROJECT_PATH + "/data/ausgrid/processed/house2_combined_load.txt",
            'fixed_sample_num': None,
        },
        'grid': {
            'type': 'PeakGrid',
            'peak_threshold': 1.0,
        },
    },
}

# RL agent configuration
AGENT_CONFIG = {
    "env": "battery_control",
    "env_config": ENV_CONFIG,
    "gamma": 0.9999999,
    "lr": 5e-5,
    "model": {
        "fcnet_hiddens": [256, 256, 256, 256],
        "fcnet_activation": "relu",
        "post_fcnet_activation": "tanh",
    },
    # Utilities settings
    "framework": "torch",
    "log_level": "WARNING",
    #"num_workers": 9,
    #"num_gpus": 1,
    "callbacks": solara.utils.rllib.InfoCallback,
    "seed" : ray.tune.randint(0, 10000000),
    "rerun_num":ray.tune.grid_search([1,2])
}

# Full experiment configuration including RL algorithm type
EXPERIMENT_CONFIG = {
    "run_or_experiment": "PPO",
    "config": AGENT_CONFIG,
    "stop": {"training_iteration": 250},
    "name": EXPERIMENT_NAME,
    "local_dir": "./tmp/tune/",
    "log_to_file": True,
    "checkpoint_freq": 5,
}

# Other settings
PLOT_DIR = PROJECT_PATH + "/figures/experiments/"

In [None]:
# Parallelisation Setup
if False:
    num_workers = 4
    gpu_count = 1
    reserved_capacity = 0.01 # Driver GPU
    num_gpus_per_worker = (gpu_count - reserved_capacity) / num_workers


    AGENT_CONFIG["num_workers"] = num_workers
    AGENT_CONFIG["num_gpus"] = num_gpus
    AGENT_CONFIG["num_envs_per_worker"]= 8
    

#AGENT_CONFIG["num_gpus"] = 1
#AGENT_CONFIG["num_envs_per_worker"]= 8
AGENT_CONFIG["num_workers"] = 10
AGENT_CONFIG["num_gpus"] = 1
#AGENT_CONFIG["remote_worker_envs"]= True

## 3. Running Experiment

In [None]:
# Setting visualisation in notebook
reporter = ray.tune.JupyterNotebookReporter(overwrite=True)
reporter.add_metric_column("custom_metrics/cost_mean")
reporter.add_metric_column("custom_metrics/power_diff_mean")

# Running experiment
analysis = ray.tune.run(
    progress_reporter=reporter,
    **EXPERIMENT_CONFIG,
    resume=True
)

## 4. Visualisation

In [None]:
import os
import solara.plot.pyplot
import matplotlib.pyplot as plt

exp_path = EXPERIMENT_CONFIG["local_dir"] + EXPERIMENT_CONFIG["name"] + "/"
#exp_path = "./tmp/tune/PPO/"
state_files = [filename for filename in os.listdir(exp_path) if "experiment_state" in filename ]
last_state_file = sorted(state_files, reverse=True)[0]

analysis = ray.tune.ExperimentAnalysis(experiment_checkpoint_path=exp_path + last_state_file)
trials = analysis.fetch_trial_dataframes()
trials = {key: trials[key] for key in sorted(trials.keys())}  # Sort trials

In [None]:
# Creating helper function for plotting
import numpy as np

def plot_trials(trials, 
                necessary_cond=None,
                other_conditions=None,
                selected_labels=None,
                experiment_name="default_experiment",
                plot_name = "plot_00_default",
                plot_dir = "./figures"):
    """Plot progress over iterations for experiments."""
    
    # Default settings
    solara.plot.pyplot.default_setup()
    
    # Other settings
    trace_start = 50
    trace_len = 100
    x_values = np.arange(trace_start+1,trace_len+1)
    ticks_gap = 25
    x_ticks = [trace_start+1] + list(np.arange((trace_start//ticks_gap + 1)*ticks_gap,trace_len+1, ticks_gap)) + [trace_len]
    
    other_conditions = list(other_conditions)
    
    for trial_name, trial_data in trials.items():
        if necessary_cond is not None:
            contains_nec = all([necessary[1] in trial_name for necessary in necessary_cond])
            #print([necessary[1] in trial_name for necessary in necessary_cond])
        
        if necessary_cond is None or contains_nec:
            label = ""
            
            for i, (cond_label, condition) in enumerate(other_conditions):
                if condition in trial_name:
                    label += cond_label
                else: 
                    label += "no " + cond_label
                if i < len(other_conditions) - 1:
                    label += ", "
            
            label = label.capitalize()
            
            if selected_labels is None or label in selected_labels:
                trace = trial_data["custom_metrics/cost_mean"][trace_start:trace_len]
                plt.plot(x_values,trace, label=label)
            
    #plt.semilogy()
    #plt.legend()
    plt.xlabel("Training iteration")
    plt.ylabel("Average cost per episode (\$)")
    plt.xticks(x_ticks)
    
    plt.savefig(fname=plot_dir + experiment_name + "_" + plot_name + OUT_FORMAT)

In [None]:
len(trials.keys())

In [None]:
OBS_KEYS_Q = ["'" + obs_key + "'" for obs_key in OBS_KEYS]
conditions = list(zip(OBS_KEYS, OBS_KEYS_Q))

plot_trials(trials, 
            necessary_cond=[["","seed="]],
            other_conditions=conditions,
            experiment_name=EXPERIMENT_NAME,
            plot_dir=PLOT_DIR,
            plot_name="plot_01_obs_ablation_study")

In [None]:
plot_trials(trials, 
            other_conditions=conditions,
            selected_labels = ["No time_step", "No time_step_cont"],
            experiment_name=EXPERIMENT_NAME,
            plot_dir=PLOT_DIR,
            plot_name="plot_02_time_step_type")

In [None]:
# Helper functions

def get_episode_data_from_checkpoint(exp_path: str, iteration_num: int):
    """Get episode data from loading policy from certain iteration of experiment."""
    
    trial_agent_config = analysis.get_all_configs()[exp_path]

    # Remove some unnecessary configs that may stop re-loading
    trial_agent_config.pop("callbacks")
    trial_agent_config.pop("num_gpus")
    agent = ray.rllib.agents.ppo.PPOTrainer(config=trial_agent_config)

    check_range=iteration_num
    episodes_data = solara.utils.rllib.run_episodes_from_checkpoints(agent=agent, 
                                                                     check_save_path=exp_path, 
                                                                     check_range=check_range)
    
    if len(episodes_data) == 1:  
        return episodes_data[0]
    else:
        return episodes_data

def get_experiment_path(trials, obs_keys):
    """Get experiment paths"""
    exp_path = [trial_path for trial_path in trials.keys() 
     if obs_keys in trial_path][0]
    return exp_path

In [None]:
# Plotting configuration

## Lines to draw in policy plot
POLICY_PLOT_CONF = {
    "selected_keys": ['load','pv_gen','energy_cont','net_load',
              'charging_power','cost','price_threshold',
              'actions'],
    "y_min":-1.3,
    "y_max":1.4,
    "show_grid":False,
}

In [None]:

exp_path = get_experiment_path(trials, 
                               obs_keys = "['load', 'pv_gen', 'battery_cont', 'time_step', 'cum_load', 'cum_pv_gen']")
episode_data = get_episode_data_from_checkpoint(exp_path, iteration_num=150)

solara.plot.pyplot.plot_episode(episode_data,title=None, **POLICY_PLOT_CONF)
plt.savefig(fname=PLOT_DIR + EXPERIMENT_NAME + "_plot_05_policy_iter150_no_grid_no_penalty_failure" + OUT_FORMAT, 
            bbox_inches='tight')