Training the model for post-fault operation
1. Uncomment Line 90 in openDSSenv34.py; which is L_OUT='L24'.
2. Change the L_OUT to corresponding line failure from the candidate set to get results for those failures. Candidate_Lines=['L7','L9','L15','L16','L18','L19','L21','L22','L23','L24']
3. Run the corresponding sections below to obtain results for PPO, A2C, and TRPO.

In [11]:
import numpy as np
import gym
from stable_baselines3 import PPO
# from stable_baselines3.common import make_vec_env
from openDSSenv34 import openDSSenv34
#import json
#import datetime as dt
import torch as th
from stable_baselines3.common.utils import set_random_seed
#from feedforwardPolicy import *
from stable_baselines3 import A2C
from numba import jit
from state_action_reward import take_action, get_state
# from CustomPolicies import ActorCriticGCAPSPolicy
from typing import Callable
from stable_baselines3.common.torch_layers import (
    BaseFeaturesExtractor,
    CombinedExtractor,
    FlattenExtractor,
    MlpExtractor,
    NatureCNN,
    create_mlp,
)


import pickle
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv

from stable_baselines3.common.env_util import make_vec_env
import time
import datetime
    
class CustomNN(BaseFeaturesExtractor):

    """
    :param observation_space: (gym.Space)
    :param features_dim: (int) Number of features extracted.
        This corresponds to the number of unit for the last layer.
    """

    def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 256):
        from torch import nn
        super(CustomNN, self).__init__(observation_space, features_dim)


        n_flatten = 1521# Need to adjust this to fit in the microgrid state space size

        # define cnn layer for feature extraction
        self.cnn_layers = nn.Sequential(
            nn.Conv1d(1, 100, kernel_size=5, stride=3, padding=1),  # 1st 1D-CNN layer
            nn.ReLU(),
            nn.Conv1d(100, 100, kernel_size=5, stride=3, padding=1),  # 2st 1D-CNN layer
            nn.ReLU(),
            nn.Conv1d(100, 100, kernel_size=5, stride=3, padding=1),  # 2st 1D-CNN layer
            nn.ReLU(),
            nn.Flatten(1,-1),
        )
        # calculate the output shape from cnn_layers
        with th.no_grad():
            n_flatten = self.cnn_layers(
                th.as_tensor(np.empty((1,1,n_flatten))).float()
            ).shape[1]

        # add a linear layer to get expected feature dimention
        self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU())


    def forward(self, observations):
        # get selected observations as state vector
        if len(observations["Unserved Energy"].shape) == 1:
            data_UE = observations["Unserved Energy"][:, None]
        else:
            data_UE = observations["Unserved Energy"]


        ################### Now Trying with the Unserved Energy into Account ################
        statevec = np.concatenate((data_UE,
                                    observations['NodeFeat(BusVoltage)'].flatten(1,2),
                                    observations['EdgeFeat(branchflow)'][:,:],
                                    observations['Adjacency'].flatten(1,2)), axis=1)

        statevec = np.array(statevec)
        statevec = th.from_numpy(statevec)

        # Add additional dimention to match 1d-cnn layer input shape.
        statevec = statevec.unsqueeze(0)
        statevec = statevec.transpose(1,0)

    
        return self.linear(self.cnn_layers(statevec))

def learning_rate_schedule(initial_value: float) -> Callable[[float], float]:

    def func(progress_remaining: float) -> float:

        return (progress_remaining**2) * initial_value
    return func
# env = openDSSenv()

def make_env(rank, seed=0):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """
    def _init():
        env = openDSSenv34()
        env.seed(seed + rank)
        return env
    set_random_seed(seed)
    return _init

if __name__ == '__main__':
    # env = openDSSenv34()
    num_cpu = 1
    # env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
    env=make_vec_env(openDSSenv34,n_envs=num_cpu,seed=0)


    # rms_prop_eps = 1e-5
    # policy_kwargs = dict(
    #     features_extractor_class=CustomNN,
    #     features_extractor_kwargs=dict(features_dim=128),
    #     activation_fn=th.nn.Tanh,
    #     net_arch=[dict(pi=[128,128],vf=[128, 128])],
    #     # optimizer_class = th.optim.RMSprop,
    #     # optimizer_kwargs = dict(alpha=0.89, eps=rms_prop_eps, weight_decay=0)
    # )
    # model = A2C('MultiInputPolicy', env,tensorboard_log="logger_L24/", policy_kwargs=policy_kwargs, verbose=1, n_steps=100,
    #         use_rms_prop=False,
    #             gamma=1.00,
    #             learning_rate= 0.000001 #learning_rate_schedule(0.07),
    #             ).learn(total_timesteps=50000, n_eval_episodes=1, log_interval=1, tb_log_name="L24_MLP")

    policy_kwargs = dict(
        features_extractor_class=CustomNN,
        features_extractor_kwargs=dict(features_dim=256),
        activation_fn=th.nn.Tanh,
        net_arch=[128, 128]
        # optimizer_class = th.optim.RMSprop,
        # optimizer_kwargs = dict(alpha=0.89, eps=rms_prop_eps, weight_decay=0)
    )


    model = PPO('MultiInputPolicy', env,tensorboard_log="logger_L24PPO_CNN/", policy_kwargs=policy_kwargs, verbose=1, n_steps=200, batch_size=100,
                gamma=1.00,
                learning_rate=0.000001,#0.00001 from 0.003 to 5e-6♠
                    ent_coef=0.01# 0.05
                    ).learn(total_timesteps=80000, n_eval_episodes=1, log_interval=1, tb_log_name="R1_Microgrid_env_cnn")

    # model = A2C(ActorCriticGCAPSPolicy, env,tensorboard_log="logger/", policy_kwargs=policy_kwargs, verbose=1, n_steps=1).learn(total_timesteps=20000, n_eval_episodes=1)
    # model = A2C('MultiInputPolicy', env,tensorboard_log="logger/", policy_kwargs=policy_kwargs, verbose=1, n_steps=1).learn(total_timesteps=20000, n_eval_episodes=1)
    #model.learn(total_timesteps=2000)


    log_dir = "."
    model.save(log_dir + "r1_MG_bus_mlp_with_entropy_05_multi_env_L24_PPO"+str(datetime.datetime.now().day)+"_"+str(datetime.datetime.now().hour)+"_"+str(datetime.datetime.now().minute))
    
    

Initializing Microgrid env with sectionalizing and tie switches
Env initialized
Using cpu device
n_flat: 5600
Logging to logger_L24PPO_CNN/R1_Microgrid_env_cnn_1
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1         |
|    ep_rew_mean     | -6.63e-05 |
| time/              |           |
|    fps             | 9         |
|    iterations      | 1         |
|    time_elapsed    | 21        |
|    total_timesteps | 200       |
----------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1             |
|    ep_rew_mean          | -6.68e-05     |
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 2             |
|    time_elapsed         | 46            |
|    total_timesteps      | 400           |
| train/                  |               |
|    approx_kl            | 1.7583371e-08 |
|   

The training is done. Now, test it.

In [13]:
import openDSSenv34
import importlib
importlib.reload(openDSSenv34)
from openDSSenv34 import openDSSenv34

env = openDSSenv34()
obs, DSSCKTOBJ, G_INIT = env.new_test_func()
start = time.time()
obs = {key: th.as_tensor([_obs]) for (key, _obs) in obs.items()}
obs['loss'] = th.as_tensor([[obs['loss']]])
#obs['TopologicalConstr'] = torch.as_tensor([[obs['TopologicalConstr']]])
obs['VoltageViolation'] = th.as_tensor([[obs['VoltageViolation']]])
obs['FlowViolation'] = th.as_tensor([[obs['FlowViolation']]])


action, values, log_probs = model.policy.forward(obs)
#print(obs['loss'])
DCKTOBJ=take_action(DSSCKTOBJ,action)
OBS=get_state(DSSCKTOBJ,G_INIT)
print("The loss is:",OBS['loss'])
print("The topology violation status is:",OBS['TopologicalConstr'])
print("The voltage violation status is: ",OBS['VoltageViolation'])
print("The branch flow violation status is: ",OBS['FlowViolation'])
print("The amount of unserved energy is:",OBS['Unserved Energy']*1000*25)
#print("The voltage violation status is:",OBS['VoltageViolation'])
print("The convergence status is:",OBS['Convergence'])
print("For Unity Load: The Optimal Configuration is :.{}",action)
end = time.time()
print("Run time [s]: ",end-start)

Initializing Microgrid env with sectionalizing and tie switches
Env initialized
The loss is: 0.0024900806769659004
The topology violation status is: 400
The voltage violation status is:  0
The branch flow violation status is:  0
The amount of unserved energy is: 1.200967570601074
The convergence status is: 0
For Unity Load: The Optimal Configuration is :.{} tensor([[1., 0., 0., 0., 1., 0., 1., 0., 1.]])
Run time [s]:  0.023583412170410156


We can tune the neural network model inside class CustomNN(BaseFeaturesExtractor) with different number of layers and different neuron.

For example, if we want three layers, with 1521, 256, and 128 neurons, we can change this as follows:
<br>
self.linear = nn.Sequential(<br>
    nn.Linear(n_flatten, 256),  # First layer with 1521 neurons <br>
    nn.ReLU(), <br>
    nn.Linear(256, 128),        # Second layer with 256 neurons <br>
    nn.ReLU(), <br>
) <br>
Thrid layer is auto assigned based on action.