In [1]:
import ray
from ray import tune
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.ppo import APPOTrainer
import argparse
from ray.tune.registry import register_env
from pycigar.utils.registry import make_create_env
import yaml
import time
from pycigar.utils.input_parser import input_parser

SAVE_RATE = 2

"""
Parser to pass argument from terminal command
--run: RL algorithm, ex. PG, PPO, IMPALA
--stop: stop criteria of experiment. The experiment will stop when mean reward reach to this value.
Example of terminal command:
  > python single_relative_discrete_2_lr.py --run PPO --stop 0
"""
parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="PPO")  # try PG, PPO, IMPALA
parser.add_argument("--stop", type=int, default=0)

"""
Load the scenarios configuration file. This file contains the scenario information
for the experiment.
"""
sim_params = input_parser('ieee37busdata_regulator_attack')

"""
Register the environment to OpenGymAI. This is necessary, RLlib can find the new environment
with string name env_name_v:version:, ex. SingleRelativeDiscreteCoopEnv_v0.
env_name: name of environment being used.
sim_params: simulation params, it is the scenario configuration.
simulator: the simulator being used, ex. opendss, gridlabd...
tracking_ids: list of ids of devices being tracked during the experiment.
"""

pycigar_params = {"exp_tag": "cooperative_multiagent_ppo",
                  "env_name": "CentralControlPVInverterEnv",
                  "sim_params": sim_params,
                  "simulator": "opendss",
                  "tracking_ids": ['adversary_inverter_s701a', 'reg1']}
"""
call function make_create_env() to register the new environment to OpenGymAI.
create_env() is a function to create new instance of the environment.
env_name: the registered name of the new environment.
"""
create_env, env_name, create_test_env, test_env_name = make_create_env(params=pycigar_params, version=0)
register_env(env_name, create_env)
register_env(test_env_name, create_test_env)

test_env = create_test_env()
obs_space = test_env.observation_space  # get the observation space, we need this to construct our agent(s) observation input
act_space = test_env.action_space  # get the action space, we need this to construct our agent(s) action output


In [2]:
obs = test_env.reset()
reward = 0
done = False
while not done:
    # for each observation, let the policy decides what to do
    obs, r, done, _ = test_env.step(2)
    reward += r
# plot the result. This will be saved in ./results
#test_env.plot(pycigar_params['exp_tag'], env_name, 0, reward)

[0. 0.]
[0. 0.]
[0. 0.]
[-83.24441561 -83.24441561]
[-77.11036346 -77.11036346]
[-75.85001524 -75.85001524]
[-75.93714182 -75.93714182]
[-62.9361544 -62.9361544]
[-45.06247553 -45.06247553]
[-42.62798438 -42.62798438]
[-34.17450479 -34.17450479]
[-30.52825546 -30.52825546]
[-27.80450939 -27.80450939]
[-18.95911 -18.95911]
[-17.7130832 -17.7130832]
[-14.94631795 -14.94631795]
[-19.72581571 -19.72581571]
[-14.20651153 -14.20651153]
[-9.50609748 -9.50609748]
[-12.66974912 -12.66974912]
[-11.47217064 -11.47217064]
[-10.44631314 -10.44631314]
[-10.44631314 -10.44631314]
[-13.26383768 -13.26383768]
[-13.96845498 -13.96845498]
[-17.69231889 -17.69231889]
[-19.49459327 -19.49459327]
[-18.30710317 -18.30710317]
[-14.39420185 -14.39420185]
[-14.82329347 -14.82329347]
[-14.85134458 -14.85134458]
[-16.56967929 -16.56967929]
[-18.9719051 -18.9719051]
[-18.21545791 -18.21545791]
[-20.51148078 -20.51148078]
[-22.40054571 -22.40054571]
[-21.12170758 -21.12170758]
[-18.95448756 -18.95448756]
[-16.68141

[-81.73828127 -81.73828127]
[-75.28956883 -75.28956883]
[-73.16710772 -73.16710772]
[-83.29750678 -83.29750678]
[-73.01552552 -73.01552552]
[-80.92359751 -80.92359751]
[-79.79529929 -79.79529929]
[-86.16717631 -86.16717631]
[-81.36323963 -81.36323963]
[-72.12767322 -72.12767322]
[-81.75238009 -81.75238009]
[-86.10685125 -86.10685125]
[-86.11486737 -86.11486737]
[-86.11486737 -86.11486737]
[-82.26409921 -82.26409921]
[-79.33045553 -79.33045553]
[-75.29429694 -75.29429694]
[-78.1931193 -78.1931193]
[-74.21144661 -74.21144661]
[-78.2735369 -78.2735369]
[-84.08960302 -84.08960302]
[-81.95667521 -81.95667521]
[-84.83888272 -84.83888272]
[-81.98347675 -81.98347675]
[-69.73404894 -69.73404894]
[-80.07137592 -80.07137592]
[-70.13268482 -70.13268482]
[-85.71909337 -85.71909337]
[-81.81863445 -81.81863445]
[-74.38217243 -74.38217243]
[-81.2017332 -81.2017332]
[-79.75791164 -79.75791164]
[-81.24637957 -81.24637957]
[-78.32091863 -78.32091863]
[-78.32091863 -78.32091863]
[-85.27838873 -85.27838873

[-2.55276855 -2.55276855]
[-3.29774911 -3.29774911]
[-5.30923275 -5.30923275]
[-4.3852742 -4.3852742]
[-3.00192803 -3.00192803]
[-1.64105803 -1.64105803]
[-1.07574264 -1.07574264]
[-1.51535504 -1.51535504]
[-3.49839849 -3.49839849]
[-4.33972484 -4.33972484]
[-4.33972484 -4.33972484]
[-4.66339974 -4.66339974]
[-3.45488478 -3.45488478]
[-1.48854476 -1.48854476]
[-1.44313341 -1.44313341]
[-1.26407077 -1.26407077]
[-1.19189098 -1.19189098]
[-1.76911994 -1.76911994]
[-2.91895883 -2.91895883]
[-3.52794429 -3.52794429]
[-2.50904798 -2.50904798]
[-2.04354303 -2.04354303]
[-0.65907191 -0.65907191]
[0.16876247 0.16876247]
[0.36372091 0.36372091]
[-0.50085208 -0.50085208]
[-1.58594647 -1.58594647]
[-2.68709776 -2.68709776]
[-4.4923396 -4.4923396]
[-3.36938536 -3.36938536]
[-2.55818475 -2.55818475]
[-2.55818475 -2.55818475]
[-2.40842374 -2.40842374]
[-2.21578277 -2.21578277]
[-2.1042814 -2.1042814]
[-2.37941578 -2.37941578]
[-1.49027004 -1.49027004]
[-1.31601499 -1.31601499]
[-0.323669 -0.323669]


In [3]:
test_env.plot_unbalance(pycigar_params['exp_tag'], env_name, 0, reward)

obs = test_env.reset()
reward = 0
done = False
while not done:
    # for each observation, let the policy decides what to do
    obs, r, done, _ = test_env.step(3)
    reward += r
# plot the result. This will be saved in ./results
#test_env.plot(pycigar_params['exp_tag'], env_name, 0, reward)

env = create_env()
obs_space = env.observation_space  # get the observation space, we need this to construct our agent(s) observation input
act_space = env.action_space  # get the action space, we need this to construct our agent(s) action output
obs = env.reset()
reward = 0
done = False
while not done:
    # for each observation, let the policy decides what to do
    obs, r, done, _ = env.step(3)
    reward += r
# plot the result. This will be saved in ./results
#test_env.plot(pycigar_params['exp_tag'], env_name, 0, reward)

test_env.get_pycigar_output_specs()

directory = '/Users/toanngo/Documents/GitHub/ceds-cigar/rl/data/ieee37busdata/ieee37.dss'
dss.run_command('Redirect '+ directory)