# **Environment: Tank Water Level Control**

In [None]:
# Building the custom environment for OpenAi Gym

from gym import Env 
# Env is a placeholder class that allows us to build our environment

from gym.spaces import Box
# Superclass that is used to define observation and action spaces
#`Box` is for continuing-space, `Discrete` for discrete-space and `Dict` for multiple input
# https://github.com/openai/gym/blob/master/gym/spaces/box.py
"""Implementation of a space that represents closed boxes in euclidean space."""
"""
class Box(Space[np.ndarray]):
    A (possibly unbounded) box in :math:`\mathbb{R}^n`.
    Specifically, a Box represents the Cartesian product of n closed intervals.
    Each interval has the form of one of :math:`[a, b]`, :math:`(-\infty, b]`,
    :math:`[a, \infty)`, or :math:`(-\infty, \infty)`.
    There are two common use cases:
    * Identical bound for each dimension::
        >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
        Box(3, 4)
    * Independent bound for each dimension::
        >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
        Box(2,)
    
"""
import numpy as np
import random

# (This removes np.float32 warnings, but can be solved with np.float64 at "box" definition):
# gym.logger.set_level(40) 

class Model(Env):
# By passing Env to the class Model we defined, we inherit the methods and properties of OpenAI Gym

    def __init__(self):

        # Actions we can take, we only have the valve opening at each timestep (we consider a lower/upper bound of |1.0e-3|)
        self.action_space = Box(low=np.array([-1.0e-3]), high=np.array([1.0e-3]), dtype=np.float64)
        # The function "Box" can support a tensor

        # # Valve opening observation array (maximum physically possible opening of 1.0)
        # self.valve_opening  = Box(low=np.array([0.00]), high=np.array([1.00]), dtype=np.float64)

        # # Water Height observation array (assuming Maximum Height of Tank of 1.0 m)
        # self.water_height = Box(low=np.array([0.00]), high=np.array([1.00]), dtype=np.float64)

        # Inflow observation array (assuming Maximum Possible Flow of 150 m³/h) / remember: Inflow is not a state!
        # self.inflow = Box(low=np.array([0.00]), high=np.array([150.00]), dtype=np.float64)

        # Observation space array (only Valve opening and Water Height; both with lower bound 0.0 and upper bound 1.0)
        self.observation_space = Box(low=0.0, high=1.0, shape=(1, 2), dtype=np.float64)

        # Set observation space (reminder: flow isn't measured/ isn't a state)
        # self.observation_space = (self.inflow , self.valve_opening, self.water_height)

        ## Area of 3 m²
        ## Diameter of 1.95441 m
        self.tank_diameter = 1.95441

        # Set initial states (we can instantiate them a bit randomly - 10% var):
        # self.current_inflow = 100 + random.uniform(-10.00, 10.00)
        # self.current_valve_opening = 0.5 + random.uniform(-0.05, 0.05)
        # self.current_water_height = 0.5 + random.uniform(-0.05, 0.05)

        self.current_inflow = 100
        self.current_valve_opening = 0.5
        self.current_water_height = 0.5

        ##  Calculate Volume of Water inside Tank
        ##  Maximum Possible Volume of 3.0 m³
        self.current_water_volume = (((self.tank_diameter / 2) ** 2) * np.pi) * self.current_water_height

        ## Valve coefficient (Cv) is given
        self.valve_coefficient = 282.84

        ## Setpoint in terms of the Usual Operation Height of 0.5 m 
        self.setpoint =  0.5

        # Tolerance acceptable for setpoint error (in reward definition):
        self.tolerance = 1e-4 # test 1e-6
       
        self.state = (self.current_valve_opening, self.current_water_height)

        # Set episode length
        ## Timestep = 0.01h
        ## Episode total time = 2h (200 timesteps)
        ## Total number of episodes = 2000
        self.time_per_episode = 200

        # Initialize time counter
        self.time_step = 0

    ## Provides current water height from the most up to date water volume
    def update_water_height(self):
        return self.current_water_volume / (((self.tank_diameter / 2) ** 2) * np.pi)   
   
    ## Calculates the non-linear outflow rate of water from the tank
    def outflow(self):
        return self.valve_coefficient * self.current_valve_opening * np.sqrt(abs(self.update_water_height())) 

    ## Error of water height from current set point
    def error(self):
        error=(self.update_water_height() - self.setpoint)
        return error
        
    def step(self, action):

        # Flow rate of water + disturbances
        self.current_inflow = self.current_inflow + self.disturbance(self.time_step)

        ## Current water volume in the tank
        self.current_water_volume = self.current_water_volume + self.current_inflow - self.outflow()

        ## Update current water height
        self.current_water_height = self.update_water_height()

        # Apply action (valve opening)
        # Continuous: [-0.01, 0.01] at each timestep
        self.current_valve_opening =  self.current_valve_opening + action

        # Add 1 Timestep = 0.01h
        self.time_step += 1
        
        # Calculate reward
        ## Reward: minus the square of height error -(m)^2
        ## Our objective is to minimize this error (or negative reward)
        # reward = -((self.error())**2)

        if (self.error())**2 <= self.tolerance : 
            reward = 1 
        else: 
            reward = -1 

        ## Determine whether it is a terminal state
        terminal = self.is_terminal(self.current_water_height)

        # Set placeholder for info
        info = {}

        # Return step information
        return self.retrieve_observation(), reward, terminal, info

    ## The terminal state is reached if time step reaches more than 200 or if water level is at 2 extremes
    def is_terminal(self, water_h):
        if self.time_step >= self.time_per_episode-1 or self.current_water_height <= 0 or self.current_water_height >= 1:
            return True
        else:
            return False

    ## Disturbances on flow rate:
    def disturbance(self, time):
        if self.time_step == 10: #0.1 h
            return 20
        elif self.time_step == 100: #1.1 h
            return -20
        else:
            return 0

    ## Retrieve current state
    def retrieve_observation(self):

        self.state = (
            self.current_valve_opening, self.current_water_height
        )
        return self.state

    def render(self):
        # Implement visualization for a game environment for example
        pass
    
    ## Reset the current state of the water tank. This involves time_step, water volume, input flow rate of water and error
    def reset(self):

        ## Set point remains fixed:
        self.setpoint = 0.5

        ## Reset time counter and other variables (we can instantiate them as before, a bit randomly)
        self.time_step = 0
        # self.current_inflow = 100 + random.uniform(-10.00, 10.00)
        # self.current_valve_opening = 0.5 + random.uniform(-0.05, 0.05)
        # self.current_water_height = 0.5 + random.uniform(-0.05, 0.05)

        self.current_inflow = 100
        self.current_valve_opening = 0.5
        self.current_water_height = 0.5

        return self.retrieve_observation()

In [None]:
env = Model()

In [None]:
# Just to see if the bounds of observation space (H and Xv) are defined correctly

a = env.observation_space.low
print(a)

b = env.observation_space.high
print(b)

[[0. 0.]]
[[1. 1.]]


In [None]:
# getattr() – This function is used to access the attribute of object, like a class.

print (getattr(env,'time_step')) 
print (getattr(env,'current_inflow')) 
print (getattr(env,'current_valve_opening')) 
print (getattr(env,'current_water_height')) 
print (getattr(env,'setpoint')) 

0
100
0.5
0.5
0.5


In [None]:
#Python calling method (error) in class (model/'env')

env.error()

0.0

In [None]:
env.update_water_height()

0.5

In [None]:
env.outflow()

99.99904099540154

In [None]:
# Python calling method (step) in class (model/'env')
# Just to see 1 step, with an action of +0.01 on Xv (valve_opening)

env.step(0.0001)

# Remember of the output we defined from retrieve.observation():
# self.current_water_volume, self.current_inflow , self.current_valve_opening, self.current_water_height, self.setpoint, self.error()

# And after we have also from the return of step(): 
# reward, terminal, info.

((0.5001, 0.5003196682150601), 1, False, {})

In [None]:
env.outflow()

100.05100860311804

In [None]:
# Python calling method (step) in class (model/'env')
# Just to see 1 step, with an action of +0.01 on Xv (valve_opening)

env.step(0.0001)

# Remember of the output we defined from retrieve.observation():
# self.current_water_volume, self.current_inflow , self.current_valve_opening, self.current_water_height, self.setpoint, self.error()

# And after we have also from the return of step(): 
# reward, terminal, info.

((0.5002, 0.4833167996806295), -1, False, {})

In [None]:
env.outflow()

98.35591014446949

In [None]:
# Python calling method (step) in class (model/'env')
# Just to see 1 step, with an action of +0.01 on Xv (valve_opening)

env.step(0.0001)

# Remember of the output we defined from retrieve.observation():
# self.current_water_volume, self.current_inflow , self.current_valve_opening, self.current_water_height, self.setpoint, self.error()

# And after we have also from the return of step(): 
# reward, terminal, info.

((0.5003, 1.0313467782254153), -1, True, {})

In [None]:
env.outflow()

143.70559910235374

In [None]:
# This allows us to see numpy arrays with more precision
# (print with a higher number of digits of precision for floating point output)

np.set_printoptions(precision=4) 

In [None]:
# Just to confirm that the observation space (of valve_opening) is a continuum between [0, 1]

env.observation_space.sample()

array([[0.8633, 0.5738]])

In [None]:
# Just to confirm that the action space (of valve_opening) is a continuum between [-0.01, 0.01]

env.action_space.sample()

array([-0.0008])

In [None]:
# Just to test the environment with random control actions
# The score is similar to return (sum of rewards)

episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        action = env.action_space.sample() #take a random action from the action space
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:-1
Episode:2 Score:-1
Episode:3 Score:-1
Episode:4 Score:-1
Episode:5 Score:-1
Episode:6 Score:-1
Episode:7 Score:-1
Episode:8 Score:-1
Episode:9 Score:-1
Episode:10 Score:-1


In [None]:
n_state

(array([0.4993]), -28355.151833018957)