In [1]:
import torch as th

from stable_baselines3 import TD3,PPO,SAC,DDPG,A2C

import gym
from gym import error, spaces, utils
from gym.utils import seeding

import subprocess           # to run the TRNSYS simulation
import os
import numpy as np

from time import sleep

from filelocker import FileLock
from filelock import FileLock as FL

import logging

In [2]:
class trnSYSEnv(gym.Env):
    # metadata = {
    #     "render_modes": [
    #         "human",
    #         "rgb_array",
    #         "state_pixels",
    #     ],
    #     "render_fps": FPS,
    # }
    def __init__(self,  deck, config, render_mode = None, model = "", deck_template = ""):

        self.act_path = "action.txt"
        self.obs_path = "observation.txt"
        self.params_path = "others.txt"
        self.render_mode = render_mode
        self.flag = 'EndOfSimFlag'
        self.state = 'iteration_ready'
        self.is_busy = 'busy'
        self.lock_path = ".lock"

        self.uid = np.random.randint(0,9999)
        self.state = FileLock(self.state)
        self.lock = FileLock(self.lock_path)
        self.act_lock = FL(self.act_path + self.lock_path)
        self.obs_lock = FL(self.obs_path + self.lock_path)

        self.config = config
        self.config['obs'] = self.obs_path
        self.prev_action = [0]
        self.proc = None
        # self.model_file = model
        self.deck_file = deck
        # self.deck_template = deck_template

        # # Action
        # self.acts  = self.config['act']
        # lower = [x[0] for x in self.acts.values()]
        # upper = [x[1] for x in self.acts.values()]
        # self.action_space = spaces.Box(np.array(lower), np.array(upper))
        self.action_space = spaces.Box(np.array([-1,]), np.array([1,]))
        
        # Observation
        # obs = np.loadtxt(self.config['obs'], delimiter='	  ', skiprows=1)
        # obs.min(axis=0)
        # obs.max(axis=0)
        # self.observation_space = spaces.Box(np.array([obs.min(axis=0)]*len(obs)), np.array([obs.max(axis=0)]*len(obs)))
        self.observation_space = spaces.Box(np.array([-50, -1500, -10]), np.array([50, 1500, 10]))
        

        pass
    def step(self, act):

        with self.act_lock:
            np.savetxt(self.act_path, np.array(act)) # assuming near-zero writing to file
            
        self.state.release()
        with self.obs_lock:
            obs = np.loadtxt(self.config['obs']).tolist()#, delimiter='	  ', skiprows=1))
            # other parameters
            params = np.loadtxt(self.params_path).tolist()

            energy_consumed = params[0]
            weight_ctrl = params[1]
            weight_energy = params[2]
        # self.lock.acquire()
        
        # Reward compute
        reward_error = obs[0]**2
        reward_ctrl = weight_ctrl * self.prev_action[0]**2
        reward_energy = weight_energy * energy_consumed**2

        reward =  -(reward_error + reward_ctrl + reward_energy)

        done = False
        self.prev_action = act
        # if the simulation is done
        if (os.path.isfile(self.flag) or (self.proc.poll() is not None) ):
            done = True
            # os.remove(self.flag)

        info = list(act)
        # obs, reward, done, info = []
        while(os.path.isfile(self.is_busy + self.lock_path)):
            continue
        sleep(0.05)
        return obs, reward, done, {}
    
    def reset(self):
        # REMOVE EXISTING OBSERVATION from previous run
        if(os.path.isfile(self.obs_path)):
            os.remove(self.obs_path)

        if self.proc is not None:
            # kill existing process
            if self.proc.poll() is None:
                self.proc.kill()
        # start another trnSYS process
        if (self.render_mode is None):
            self.proc = subprocess.Popen([self.config['trnsys_path'], os.path.abspath(self.deck_file),"/h", "/n"])#
        else:
            self.proc = subprocess.Popen([self.config['trnsys_path'], os.path.abspath(self.deck_file), "/n"])#

        self.lock.acquire()
        # not the best if you have alternative method
        # with self.act_lock:
        #     tmp = np.random.randn(1)
        #     print(tmp)
        #     np.savetxt(self.act_path, tmp) 
        # waiting for observation from trnSYS
        while(not os.path.isfile(self.obs_path)):
            continue
        with self.obs_lock:
            obs = np.loadtxt(self.config['obs']).tolist()#, delimiter='	  ', skiprows=1))

        # while self.lock.is_lock():
        #     continue

        # lock action
        return obs
        

    def render(self):
        
        pass
    
    def close(self):
        if self.proc.poll() is None:
            self.proc.kill()
            
        if(os.path.isfile(self.obs_path)):
            os.remove(self.obs_path)
        if(os.path.isfile(self.act_path)):
            os.remove(self.act_path)
        pass
    


In [3]:
act_obs_config  = {
    'trnsys_path' : 'C:\TRNSYS18Demo\Exe\TrnEXE64.exe',
    'act' : {
    # variable: [min, max]
            'pyt_t_on':[0.0, 10.0],
            'pyt_t_off':[20.0, 25.0],
            'pyt_s_high':[0.0, 1.0],
            'pyt_s_low':[0.0, 1.0],
    },
    'obs' : 'example_trnsysmodel\\trnOut_PumpData.txt',
    'reward_funct':'np.exp(-np.sum(obs))'
}

In [4]:

deck =' D E M O V E R S I O N - CoolingHouseSwitchAbleCtrl.dck'
# deck ='C:\TRNSYSproject\YeojuRL\Yeoju.dck'
env = trnSYSEnv(deck, act_obs_config)#, render_mode='human')



In [7]:
obs = env.reset()
obs

[20.0, 0.0, 20.0]

In [10]:
# act = env.action_space.sample()
act = np.array([-1])
out = env.step(act.tolist())
[act, out]

[array([-1]),
 ([47.1804230594292, 5912.465313828656, 5.2648416398569395],
  -2225.992320066719,
  True,
  {})]

In [6]:
logging.basicConfig(filename='obs.log', level=logging.DEBUG)

In [8]:
for i in range(80):
    act = env.action_space.sample()
    act
    obs, reward, done, info = env.step(act.tolist())
    logging.debug(obs)
    logging.debug(reward)
    logging.debug(act)
    if(done):
        env.reset()
    # sleep(0.1)

In [9]:
env.close()

In [8]:

# Custom actor (pi) and value function (vf) networks
# of two layers of size 32 each with Relu activation function
# Note: an extra linear layer will be added on top of the pi and the vf nets, respectively
no = 1
policy_kwargs = dict(activation_fn=th.nn.ReLU,
                     net_arch=dict(pi=[no], vf=[no], qf=[no]))

In [5]:
# import stable baselines & train
# ,
log_dir = 'tmp'
model = TD3("MlpPolicy", env, verbose=1, tensorboard_log= log_dir +'/results/')#, policy_kwargs=policy_kwargs)#, use_expln=True)#, features_extractor_kwargs=None)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [6]:
model.learn(total_timesteps=100_000)#, reset_num_timesteps=False)

Logging to tmp/results/TD3_1
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 81        |
|    ep_rew_mean     | -5.53e+05 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 4         |
|    time_elapsed    | 71        |
|    total_timesteps | 324       |
| train/             |           |
|    actor_loss      | 7.65e+03  |
|    critic_loss     | 1.23e+07  |
|    learning_rate   | 0.001     |
|    n_updates       | 162       |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 81        |
|    ep_rew_mean     | -6.86e+05 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 4         |
|    time_elapsed    | 141       |
|    total_timesteps | 648       |
| train/             |           |
|    actor_loss      | 1.36e+04  |
|    critic_loss     | 4.79e+06  |
|    learning_rate   | 0.0

KeyboardInterrupt: 

In [14]:
# features_extractor
parameters = model.actor.mu.state_dict()
parameters

OrderedDict([('0.weight',
              tensor([[ 0.0208,  0.4319, -0.4338]], device='cuda:0')),
             ('0.bias', tensor([0.1341], device='cuda:0')),
             ('2.weight', tensor([[-0.9426]], device='cuda:0')),
             ('2.bias', tensor([0.7827], device='cuda:0'))])

In [None]:
[Kp, Ki] = parameters['0.weight'].tolist()[0]
Kd = parameters['0.bias'].tolist()[0]
[abs(Kp), abs(Ki), abs(Kd)]