# Exploring RLlib for solar agent problems

This notebook explores the use of RLlib with the solar agent environment. It is partly based on the [cartpole tutorial notebook by anyscale](https://github.com/anyscale/academy/blob/9317775c393aff06cff06ae58c88f85ce201940d/ray-rllib/explore-rllib/01-Application-Cart-Pole.ipynb).

## 0. Setup

In [None]:
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [None]:
import ray
import ray.tune
import ray.rllib
import json
import glob
import os
import pandas as pd
import numpy as np
import ipywidgets as widgets
import matplotlib.pyplot as plt
import gif

from solara.constants import PROJECT_PATH
import solara.envs.components.solar
import solara.envs.components.load
import solara.envs.components.grid
import solara.envs.battery_control
import solara.envs.components.battery

In [None]:
## Initialising ray (starts background process for distributed computing)
ray.shutdown()
ray.init()

## 1. Setting up the solar agent environment

In [None]:
# To make the environment usable with RLlib
# we wrap its creation in functon

import solara.utils.logging
import logging

def battery_env_creator(env_config):
    pv_data_path = PROJECT_PATH + "/data/solar_trace_data/PV_5796.txt"
    load_data_path = PROJECT_PATH + "/data/solar_trace_data/load_5796.txt"

    # Setting up components of environment
    battery_model = solara.envs.components.battery.LithiumIonBattery(size=10, 
                                                                     chemistry="NMC", 
                                                                     time_step_len=1)
    pv_model = solara.envs.components.solar.DataPV(data_path=pv_data_path)
    load_model = solara.envs.components.load.DataLoad(data_path=load_data_path)
    grid_model = solara.envs.components.grid.PeakGrid(peak_threshold=1.0)

    # Fixing load and PV trace to single sample
    episode_num = 12
    load_model.fix_start(episode_num)
    pv_model.fix_start(episode_num)

    env = solara.envs.battery_control.BatteryControlEnv(
        battery=battery_model,
        pv_system = pv_model,
        grid = grid_model,
        load = load_model,
        logging_level = "RAY",
    )
    
    return env

ray.tune.registry.register_env("battery_control", battery_env_creator)

## 2. Setting up the RLlib agent

In [None]:
save_path = "./tmp/ppo/battery-control-4"
check_save_path = save_path + "/training_checkpoints"
out_save_path = save_path + "/outputs"

trainer = ray.rllib.agents.ppo.PPOTrainer(env="battery_control", config={
    "framework": "torch",
    "env_config": {},
    "output": out_save_path,
    "output_compress_columns": [],
    "gamma": 0.9999999, # we set the discount factor very high
    #"log_level": "WARNING",
    "log_level": "DEBUG",
})

## 3. Training agent on environment

In [None]:
num_iterations = 40
iteration_string = "Training iteration: {}, Min reward: {:.3f}, Mean reward: {:.3f}, Max reward: {:.3f}."

for i in range(num_iterations):
    iteration_out = trainer.train()
    print(iteration_string.format(i,
                                  iteration_out['episode_reward_min'], 
                                  iteration_out['episode_reward_mean'],
                                  iteration_out['episode_reward_max']))

    file_name = trainer.save(check_save_path)

print("Training completed")

## 4. Visualising Policy

In [None]:
def plot_episode(load, pv_gen, energy_cont, actions, 
                 rewards, iteration, episode_reward,
                 ):
    """Plot a single episode of battery control problem"""
    

    import matplotlib.pyplot as plt
    
    fig, ax = plt.subplots(figsize=(6,4),dpi=200)
    
    x = np.arange(0,24)
    
    ax.set_xticks([0,5,10,15,20,23], minor=False)
    ax.set_xticks(x, minor=True)
    ax.set_xticklabels([0,5,10,15,20,23], minor=False)
    ax.yaxis.grid(True, which='major')
    ax.xaxis.grid(True, which='major')
    ax.xaxis.grid(True, which='minor')
    ax.set_prop_cycle('color',["blue","green","black","red","purple"])
    
    labels = ["load (kW)","pv_gen (kW)","energy_cont (kWh)","actions","rewards ($)"]
    
    for label, values in zip(labels,[load, pv_gen, energy_cont, actions, rewards]):
         plt.plot(x, values, label=label, marker='.')

    plt.title("Iteration {}      (Overall cost {:.3f})".format (iteration, -episode_reward))
    plt.ylabel("kW / kWh / other")
    plt.legend()
    
    plt.ylim(ymin=-1.2, ymax = 2.5)
    
    #fig.savefig("tmp/figures/rl_problem_1_progress_ppo_iteration{:02.0f}.png".format(iteration))
    

def run_episode(agent, env_config={}):
    """Run a single episode with an RLlib agent."""
    
    env = agent.env_creator(env_config)
    done = False
    obs = env.reset()
    actions = []
    observations = []
    observations.append(obs)
    rewards = []
    while not done:
        action = agent.compute_action(obs, explore=False)
        #action = np.array([1])
        obs, reward, done, info = env.step(action)
        actions.append(action)
        observations.append(obs)
        rewards.append(reward)
        
    return (observations, actions, rewards)


@widgets.interact(iteration=(1,80))
@gif.frame
def plot_agent(iteration=10):
    """Plot agent for a certain training iteration"""
    
    trainer.restore(check_save_path + '/checkpoint_0000{:02.0f}/checkpoint-{}'.format(iteration,iteration))

    observations, actions, rewards = run_episode(agent=trainer)  
    episode_reward = sum(rewards)
    
    observations = np.array(observations)      
    load = observations[:24,0]
    pv_gen = observations[:24,1]
    energy_cont = observations[:24,2]
        
    plot_episode(load, pv_gen, energy_cont, actions, rewards, iteration, episode_reward)


In [None]:
# Creating GIF

frames = []
for i in range(1,31):
    frame = plot_agent(i)
    frames.append(frame)

In [None]:
gif.save(frames, 'example4.gif', duration=400, unit="ms", between="frames")