# Experiment Collection #01

This notebook contains experiments regarding the use of a penalty term.

## 1. Basic Setup

In [None]:
# Jupyter setup
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [None]:
import ray
ray.shutdown()

In [None]:
import ray
import ray.rllib
import ray.tune 
import solara.envs.creator

## Initialising ray (starts background process for distributed computing)
ray.shutdown()
ray.init(logging_level="WARNING", object_store_memory=0.5 * 10**9)

# Adding environment creator function to ray
ray.tune.registry.register_env("battery_control", solara.envs.creator.create_env)

In [None]:
ray.available_resources()

## 2. Experiment Definition

In [None]:
from solara.constants import PROJECT_PATH

# RL environment configuration
ENV_CONFIG = {
    'general': {
        'type': 'battery_control.BatteryControlEnv',
        'infeasible_control_penalty': ray.tune.grid_search([False, True]),
        'grid_charging': ray.tune.grid_search([True, False]),
        'logging_level': "RAY", # if using RLlib, set to 'RAY'
    },
    'components': {
        'battery': {
            'type': 'LithiumIonBattery',
            'size': 10,
            'chemistry': 'NMC',
            'time_step_len': 1,
        },
        'solar': {
            'type': 'DataPV',
            'data_path': PROJECT_PATH + "/data/solar_trace_data/PV_5796.txt",
            'fixed_sample_num': 12,
        },
        'load': {
            'type': 'DataLoad',
            'data_path': PROJECT_PATH + "/data/solar_trace_data/load_5796.txt",
            'fixed_sample_num': 12,
        },
        'grid': {
            'type': 'PeakGrid',
            'peak_threshold': 1.0,
        },
    },
}

# RL agent configuration
AGENT_CONFIG = {
    "framework": "torch",
    "num_workers": 1,
    "env": "battery_control",
    "env_config": ENV_CONFIG,
    "gamma": 0.9999999,
    "log_level": "WARNING",
    "lr": 5e-5,
    "model": {
        "fcnet_hiddens": [256, 256, 256, 256],
        "fcnet_activation": "relu",
        "post_fcnet_activation": "tanh",
    },
}

# Full experiment configuration including RL algorithm type
EXPERIMENT_CONFIG = {
    "run_or_experiment": "PPO",
    "config": AGENT_CONFIG,
    "stop": {"training_iteration": 2},
    "local_dir": "./tmp/tune/",
    "log_to_file": True,
    "checkpoint_freq": 1,
}

In [None]:
analysis = ray.tune.run(
    progress_reporter=ray.tune.JupyterNotebookReporter(overwrite=True,
                                                       #metric_columns=["cost"]
                                                      ), #This only defines what the progress looks like in this notebook
    **EXPERIMENT_CONFIG,
)

In [None]:
ray.tune.JupyterNotebookReporter

In [None]:
ray.rllib.agents.ppo.PPOTrainer

In [None]:
analysis.results