<a href="https://colab.research.google.com/github/shadiakiki1986/ml-competitions/blob/master/other/201902-WtpRl/WtpDesignerEnv_v0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Water Treatment Plant designer

This notebook demonstrates a simulation environment for the design of a water treatment plant as well as training an agent to design the WTP to improve the feed water quality.

The simulation is then used to train a feed-forward neural network to propose a water treatment plant design based on water quality parameters.

The water parameters are low/high level of: turbidity, hardness, bacteria.

The allowed elements in the system are: pipe, sand filter, softener, UV.

# install pre-reqs

In [1]:
# install openai gym
!pip install gym | tail



In [2]:
# install rlworkgroup/garage
# Copied from colab/2019-01-21/t3.ipynb
#------------------------------------

# Install dependencies (copied from garage/environment.yml)
!apt-get install libglfw3 libglfw3-dev | tail

# >>>>>>>>   requires restart of runtime in colab.research.google.com due to joblib and rsa <<<<<<
!pip install awscli  boto3  cached_property  cloudpickle  cma==1.1.06 flask  gym  "box2d-py>=2.3.4"  hyperopt  ipdb  ipywidgets  jsonmerge  "joblib<0.13,>=0.12"  jupyter  mako  matplotlib  memory_profiler  pandas  path.py    polling  pre_commit  protobuf  psutil  pygame  pyglet  PyOpenGL  pyprind  python-dateutil  pyzmq  scikit-image  scipy  tensorboard  | tail
#"tensorflow<1.10,>=1.9.0"  Theano==1.0.2    "mujoco-py<1.50.2,>=1.50.1" gym[all]==0.10.8
#!pip install jsonmerge glfw mako pygame
!pip install pyprind cma glfw | tail

# Install garage (continued in next cell)
!git clone https://github.com/rlworkgroup/garage
# !cd garage && pip install -e . # >>>>>>>>   requires restart of runtime in colab.research.google.com due to joblib and rsa <<<<<<
#!pip show rlgarage garage

Preparing to unpack .../5-libxrandr-dev_2%3a1.5.1-1_amd64.deb ...
Unpacking libxrandr-dev:amd64 (2:1.5.1-1) ...
Setting up libvulkan1:amd64 (1.1.82.0-0ubuntu0.18.04.1~gpu1) ...
Setting up libvulkan-dev:amd64 (1.1.82.0-0ubuntu0.18.04.1~gpu1) ...
Setting up libglfw3:amd64 (3.2.1-1) ...
Processing triggers for libc-bin (2.27-3ubuntu1) ...
Setting up x11proto-randr-dev (2018.4-4) ...
Processing triggers for man-db (2.8.3-2ubuntu0.1) ...
Setting up libglfw3-dev:amd64 (3.2.1-1) ...
Setting up libxrandr-dev:amd64 (2:1.5.1-1) ...
[31mfeaturetools 0.4.1 has requirement pandas>=0.23.0, but you'll have pandas 0.22.0 which is incompatible.[0m
  Found existing installation: rsa 4.0
    Uninstalling rsa-4.0:
      Successfully uninstalled rsa-4.0
  Found existing installation: botocore 1.12.94
    Uninstalling botocore-1.12.94:
      Successfully uninstalled botocore-1.12.94
  Found existing installation: joblib 0.13.2
    Uninstalling joblib-0.13.2:
      Successfully uninstalled joblib-0.13.2
Su

Collecting glfw
  Downloading https://files.pythonhosted.org/packages/5d/65/c6275744a01425195f1f446e022e5dfa6497aa68479a3952e434e04b2fa0/glfw-1.7.1.tar.gz
Building wheels for collected packages: glfw
  Building wheel for glfw (setup.py): started
  Building wheel for glfw (setup.py): finished with status 'done'
  Stored in directory: /root/.cache/pip/wheels/c5/53/f9/fd31798dce7e10aa49f8354e4111b9c9cad10c894184658663
Successfully built glfw
Installing collected packages: glfw
Successfully installed glfw-1.7.1
Cloning into 'garage'...
remote: Enumerating objects: 116, done.[K
remote: Counting objects: 100% (116/116), done.[K
remote: Compressing objects: 100% (81/81), done.[K
remote: Total 10472 (delta 54), reused 60 (delta 35), pack-reused 10356[K
Receiving objects: 100% (10472/10472), 9.13 MiB | 17.31 MiB/s, done.
Resolving deltas: 100% (7322/7322), done.


In [1]:
# Install garage, commit e7324a68dedd94b4ea15a9c761bab2af032e2480 before the upcoming commits related to bumping gym/dm-control/mujoco versions
!cd garage && git checkout e7324a68dedd94b4ea15a9c761bab2af032e2480
!cd garage && pip install -e . # >>>>>>>>   requires restart of runtime in colab.research.google.com due to joblib and rsa <<<<<<

Note: checking out 'e7324a68dedd94b4ea15a9c761bab2af032e2480'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by performing another checkout.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -b with the checkout command again. Example:

  git checkout -b <new-branch-name>

HEAD is now at e7324a6 Move nb_utils.py to garage.experiment
Obtaining file:///content/garage
Installing collected packages: rlgarage
  Running setup.py develop for rlgarage
Successfully installed rlgarage




---



---



---



# gym environment for WTP design

In [1]:
# Create a gym env that simulates the current water treatment plant
# Based on https://github.com/openai/gym/blob/master/gym/envs/toy_text/nchain.py

import gym
from gym import spaces
#from gym.utils import seeding
import numpy as np
import random



def act_on_water(water_in, water_parameter):
  """
  Parameters
  - water_in: water sample parameters before "filtering"
  - water_parameter: specific parameter that is being filtered
  
  Returns
  - water_out: water sample parameters after the targeted parameter
  - reward:
    - if water quality is improvable and made improvement and improvement is relevant, then +2
    - if water quality is not improvable and did not try to make improvement, then +1
    - if water quality is improvable and made improvement but improvement is not relevant, then 0
    - if water quality is improvable and did not make improvement, then -1
    - if water quality is not improvable and tried to make improvement, then -2
  """
  # debugging
  #print("water_in", water_in, "water_parameter", water_parameter)
  
  # utility variable
  is_improvable = any(water_in[k] for k in water_in)
  
  # if chose pipe (do nothing)
  if water_parameter is None:
    # if any parameter is "high"
    if is_improvable:
      # water was improvable but didn't try
      return water_in, -1
    
    # water was not improvable to begin with
    return water_in, +1

  # sanity check
  if water_parameter not in water_in:
    raise ValueError("water parameter = %s not a property of the water"%water_parameter)

  # if water is not improvable to begin with, but tried to make an improvement
  if not is_improvable:
    return water_in, -2
    
  # water is improvable, but chose an irrelevant parameter
  if not water_in[water_parameter]:
    # returning a reward of +1 here caused the best policy to use a few
    # irrelevant target parameters at first (to ramp up points)
    # and then start installing the relevant elements.
    # This is similar to a salesman who sells useless WTP elements first
    # to ramp up sales, and then sells the right system to close.
    return water_in, 0
  
  # water is improvable, and chose a relevant parameter
  water_out = water_in.copy()
  water_out[water_parameter] = False
  return water_out, +2


# test
print("turbidity: High, action: turbidity",      act_on_water({"turbidity": True }, "turbidity"))
print("turbidity: Low , action: turbidity",      act_on_water({"turbidity": False}, "turbidity"))
print("turbidity: High, action: pipe",      act_on_water({"turbidity": True }, None))
print("turbidity: Low , action: pipe",      act_on_water({"turbidity": False}, None))
try:
  print("turbidity: High, action: hardness", act_on_water({"turbidity": True }, "hardness"))
except:
  print("turbidity: High, action: hardness", "error: parameter not in water")
  
#print("state = 1, action: pipe",      act_on_water(1, None))

turbidity: High, action: turbidity ({'turbidity': False}, 2)
turbidity: Low , action: turbidity ({'turbidity': False}, -2)
turbidity: High, action: pipe ({'turbidity': True}, -1)
turbidity: Low , action: pipe ({'turbidity': False}, 1)
turbidity: High, action: hardness error: parameter not in water


In [0]:

# Gym env
class WtpDesignerEnv_v0(gym.Env):
    """Water Treatment Plant environment
    
    This is a simulation of a water treatment plant (WTP).
    
    Observation:
      Parameters in water
      Type: Dict of 3 keys, each of which is Discrete(2)
      turbidity       True/False (= High/Low)
      Hardness  True/False (= High/Low)
      Bacteria  True/False (= High/Low)
      
    Actions:
      WTP Element to implement at i-th stage
      Type: Discrete(4)
      0 pipe
      2 sediment
      3 softener
      4 uv
      
    Reward: check function "act_on_water"
      
    Episode termination:
      All elements of WTP are chosen
    """
    def __init__(self, attempts_max = 1000):
        self.wtp_elements = [None, "turbidity", "hardness", "bacteria"]
        
        
        # number of elements in the WTP system generated
        self.n_elements = 5
        
         # choose the element for the current step
        self.action_space = spaces.Discrete(len(self.wtp_elements))
        
        # https://github.com/openai/gym/blob/master/gym/spaces/dict_space.py
        # the observation space is a Dict of 3 key-value pairs
        # Note that the "FlatDictWrapper" later just flattens this to a single Discrete(8) observation
        # for the sake of being able to use the CategoricalMLPPolicy
        self.observation_space = spaces.Dict({
            "turbidity": spaces.Discrete(2), 
            "hardness": spaces.Discrete(2), 
            "bacteria": spaces.Discrete(2),
        })
        
        self.reset()
        #self.seed()

    #def seed(self, seed=None):
    #    self.np_random, seed = seeding.np_random(seed)
    #    return [seed]

    def step(self, action):
        assert self.action_space.contains(action), "action not in action space!"
        assert self.element_i < self.n_elements
        
        # increment number of attempts taken
        self.element_i += 1

        # calculate reward of this element
        wtp_i = self.wtp_elements[action]
        self.state, reward = act_on_water(self.state, wtp_i)
        #print("\t state + element -> state after + reward", wtp_i, self.state, reward_i)
                                      
        # init
        done = False

        # allow a maximum number of attempts to get the WTP selection to work
        if self.element_i >= self.n_elements:
          done = True
          
        return self.state, reward, done, {}
      
    def reset(self, s0=None):
      # s0 - desired state
      if s0 is None:
        s0 = {
          "turbidity":  np.random.rand() < 0.5,
          "hardness": np.random.rand() < 0.5,
          "bacteria": np.random.rand() < 0.5,                
        }
      self.state = s0.copy()
      self.element_i = 0
      return self.state

In [3]:

# some smoke testing
print("smoke test")

# example
env_test = WtpDesignerEnv_v0()
state_initial = env_test.reset().copy()
print("water in:")
print("reset()", state_initial)
print("env.state", env_test.state)

print("grid search .. start")
solution = dict(act=None, rew=-99999, water_out=None)
done = False

# builds all possible wtp of these elements
import itertools
wtp_all1 = list(itertools.product(range(len(env_test.wtp_elements)), repeat=env_test.n_elements))
wtp_all2 = []
# append last 2 pipes
for wtp_i in wtp_all1:
  wtp_i = list(wtp_i)
  # 0 is the index of None in env_test.wtp_elements
  wtp_i.append(0)
  wtp_all2.append(wtp_i)

print("grid search .. end")
#print("all wtp")
#print(wtp_all2)

smoke test
water in:
reset() {'turbidity': False, 'hardness': False, 'bacteria': False}
env.state {'turbidity': False, 'hardness': False, 'bacteria': False}
grid search .. start
grid search .. end


In [4]:

# iterate
for wtp_i in wtp_all2:
  reward_sum = 0
  env_test.reset(s0=state_initial)
  #print("-"*10)
  for action in wtp_i:
    water_out, reward_i, done, _ = env_test.step(action)
    reward_sum += reward_i
    if done:
      break
    
  #print("water in", env_test.state, "wtp", [env_test.wtp_elements[x] for x in wtp_i], "water out", water_out, "reward", reward_sum)
  if reward_sum > solution['rew']:
    solution['act'] = wtp_i
    solution['rew'] = reward_sum
    solution['water_out'] = water_out # last result


print("grid search .. end")

# show result of grid search
print("*"*30)
print("water in:", state_initial)
print("wtp chosen", [env_test.wtp_elements[x] for x in solution['act']])
print("total reward", solution['rew'])
print("water out", solution['water_out'])

grid search .. end
******************************
water in: {'turbidity': False, 'hardness': False, 'bacteria': False}
wtp chosen [None, None, None, None, None, None]
total reward 5
water out {'turbidity': False, 'hardness': False, 'bacteria': False}


# Train agent

In [0]:
# register the env with gym
# https://github.com/openai/gym/tree/master/gym/envs#how-to-create-new-environments-for-gym
from gym.envs.registration import register

register(
    id='WtpDesignerEnv-v0',
    #entry_point='gym_foo.envs:FooEnv',
    entry_point=WtpDesignerEnv_v0,
)

# test registration was successful
env = gym.make("WtpDesignerEnv-v0")

In [0]:
# The contents of this cell are mostly copied from garage/examples/...
# NEED TO run this twice for the first run in the runtime in colab, 1st for creating the personal config

from garage.baselines import LinearFeatureBaseline # <<<<<< requires restarting the runtime in colab after the 1st dependency installation above
from garage.envs import normalize
#from garage.envs.box2d import CartpoleEnv # no need since will use WtpDesignerEnv_v0 defined above
from garage.experiment import run_experiment
from garage.tf.algos import TRPO
from garage.tf.envs import TfEnv
#from garage.tf.policies import GaussianMLPPolicy
from garage.tf.policies import CategoricalMLPPolicy

import gym # already imported before

In [0]:
# start a tensorflow session so that we can keep it open after training and use the trained network to see it performing
import tensorflow as tf
sess = tf.InteractiveSession()

# no need to initialize
#sess.run(tf.global_variables_initializer())


In [0]:
# based on https://github.com/openai/gym/blob/5404b39d06f72012f562ec41f60734bd4b5ceb4b/gym/wrappers/dict.py
class FlattenDictWrapper2(gym.ObservationWrapper):
    """Flattens selected keys of a Dict observation space into
    an array.
    """
    def __init__(self, env, dict_keys):
        super().__init__(env)
        self.dict_keys = dict_keys

        # Figure out observation_space dimension.
        # FIXME this is never used .. ref https://github.com/openai/gym/blob/6497c9f1c6e43066c8945f02ed3ed4d234f45dc1/gym/core.py
        size = 1
        for key in dict_keys:
            shape = self.env.observation_space.spaces[key].n
            size *= shape
        self.observation_space = gym.spaces.Discrete(size)

    def observation(self, observation):
        """
        flatten the dict observation
        idea is same as the following
        
        >>> import itertools
        >>> x=np.array(list(itertools.product(range(4), range(3), range(2))))
        >>> y=x[:,0]*2*3 + x[:,1]*2 + x[:,2]
        >>> y.sort()
        >>> y
        array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
               17, 18, 19, 20, 21, 22, 23])
        
        Notice how "y" has no duplicates, and hence is a one-to-one mapping from the original matrix "x"
        
        Now convert y back to x
        
        >>> z1 = y//(2*3)
        >>> z2 = (y%(2*3))//2
        >>> z3 = (   (y%(2*3))%2  )
        >>> z = np.array([np.array(z1), np.array(z2), np.array(z3)]).T
        
        Notice that z == x
        """
        assert isinstance(observation, dict)
        obs = []
        dims = []
        for key in self.dict_keys:
            obs.append(observation[key])
            dims.append(self.env.observation_space.spaces[key].n)
            
        dims = np.array(dims).cumprod()
        dims = (dims / dims[0]).astype('int')
        
        obs = np.array(obs).astype('int')

        #print("X"*10)
        #print(obs)
        #print(dims)

        return (obs * dims).sum()


In [9]:
# Train the policy (neural network) on the environment
#----------------------------------
from gym import wrappers

# env = TfEnv(normalize(gym.make("CartPole-v0")))
env = gym.make("WtpDesignerEnv-v0")
#env = wrappers.FlattenDictWrapper(env, ["turbidity", "hardness", "bacteria"])
wq_elements = ["turbidity", "hardness", "bacteria"]
env = FlattenDictWrapper2(env, wq_elements)
env = TfEnv(normalize(env))

policy = CategoricalMLPPolicy(
#policy = DictCategoricalMLPPolicy(
    name="policy", env_spec=env.spec, hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=env.spec)


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.random.categorical instead.


In [10]:

algo = TRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=4000,
    max_path_length=5+2, #env.n_elements+2, # add 2 since this is just a safety measure
    n_itr=50, # 50 is enough to reach steady state: average return 6.5, max return 8, min return 5
    discount=0.99,
    max_kl_step=0.01,
    plot=False)


Instructions for updating:
Use tf.cast instead.


In [11]:

algo.train(sess=sess)

2019-02-16 20:14:04 | itr #0 | Obtaining samples...
2019-02-16 20:14:04 | itr #0 | Obtaining samples for iteration 0...


0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00

2019-02-16 20:14:04 | itr #0 | Processing samples...





2019-02-16 20:14:04 | itr #0 | Logging diagnostics...
2019-02-16 20:14:04 | itr #0 | Optimizing policy...
2019-02-16 20:14:04 | itr #0 | Computing loss before
2019-02-16 20:14:04 | itr #0 | Computing KL before
2019-02-16 20:14:04 | itr #0 | Optimizing
2019-02-16 20:14:04 | itr #0 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:04 | itr #0 | computing loss before
2019-02-16 20:14:04 | itr #0 | performing update
2019-02-16 20:14:04 | itr #0 | computing gradient
2019-02-16 20:14:04 | itr #0 | gradient computed
2019-02-16 20:14:04 | itr #0 | computing descent direction
2019-02-16 20:14:05 | itr #0 | descent direction computed
2019-02-16 20:14:05 | itr #0 | backtrack iters: 1
2019-02-16 20:14:05 | itr #0 | computing loss after
2019-02-16 20:14:05 | itr #0 | optimization finished
2019-02-16 20:14:05 | itr #0 | Computing KL after
2019-02-16 20:14:05 | itr #0 | Computing loss after
2019-02-16 20:14:05 | itr #0 | Fitting baseline...
2019-02-16 2

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:06 | itr #1 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:06 | itr #1 | Logging diagnostics...
2019-02-16 20:14:06 | itr #1 | Optimizing policy...
2019-02-16 20:14:06 | itr #1 | Computing loss before
2019-02-16 20:14:06 | itr #1 | Computing KL before
2019-02-16 20:14:06 | itr #1 | Optimizing
2019-02-16 20:14:06 | itr #1 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:06 | itr #1 | computing loss before
2019-02-16 20:14:06 | itr #1 | performing update
2019-02-16 20:14:06 | itr #1 | computing gradient
2019-02-16 20:14:06 | itr #1 | gradient computed
2019-02-16 20:14:06 | itr #1 | computing descent direction
2019-02-16 20:14:06 | itr #1 | descent direction computed
2019-02-16 20:14:06 | itr #1 | backtrack iters: 1
2019-02-16 20:14:06 | itr #1 | computing loss after
2019-02-16 20:14:06 | itr #1 | optimization finished
2019-02-16 20:14:06 | itr #1 | Computing KL after
2019-02-16 20:14:06 | itr #1 | Computing loss after
2019-02-16 20:14:06 | itr #1 | Fitting baseline...
2019-02-16 2

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:06 | itr #2 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:06 | itr #2 | Logging diagnostics...
2019-02-16 20:14:06 | itr #2 | Optimizing policy...
2019-02-16 20:14:06 | itr #2 | Computing loss before
2019-02-16 20:14:06 | itr #2 | Computing KL before
2019-02-16 20:14:06 | itr #2 | Optimizing
2019-02-16 20:14:06 | itr #2 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:06 | itr #2 | computing loss before
2019-02-16 20:14:06 | itr #2 | performing update
2019-02-16 20:14:06 | itr #2 | computing gradient
2019-02-16 20:14:07 | itr #2 | gradient computed
2019-02-16 20:14:07 | itr #2 | computing descent direction
2019-02-16 20:14:07 | itr #2 | descent direction computed
2019-02-16 20:14:07 | itr #2 | backtrack iters: 1
2019-02-16 20:14:07 | itr #2 | computing loss after
2019-02-16 20:14:07 | itr #2 | optimization finished
2019-02-16 20:14:07 | itr #2 | Computing KL after
2019-02-16 20:14:07 | itr #2 | Computing loss after
2019-02-16 20:14:07 | itr #2 | Fitting baseline...
2019-02-16 2

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:07 | itr #3 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:07 | itr #3 | Logging diagnostics...
2019-02-16 20:14:07 | itr #3 | Optimizing policy...
2019-02-16 20:14:07 | itr #3 | Computing loss before
2019-02-16 20:14:07 | itr #3 | Computing KL before
2019-02-16 20:14:07 | itr #3 | Optimizing
2019-02-16 20:14:07 | itr #3 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:07 | itr #3 | computing loss before
2019-02-16 20:14:07 | itr #3 | performing update
2019-02-16 20:14:07 | itr #3 | computing gradient
2019-02-16 20:14:07 | itr #3 | gradient computed
2019-02-16 20:14:07 | itr #3 | computing descent direction
2019-02-16 20:14:08 | itr #3 | descent direction computed
2019-02-16 20:14:08 | itr #3 | backtrack iters: 1
2019-02-16 20:14:08 | itr #3 | computing loss after
2019-02-16 20:14:08 | itr #3 | optimization finished
2019-02-16 20:14:08 | itr #3 | Computing KL after
2019-02-16 20:14:08 | itr #3 | Computing loss after
2019-02-16 20:14:08 | itr #3 | Fitting baseline...
2019-02-16 2

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00

2019-02-16 20:14:08 | itr #4 | Processing samples...





2019-02-16 20:14:08 | itr #4 | Logging diagnostics...
2019-02-16 20:14:08 | itr #4 | Optimizing policy...
2019-02-16 20:14:08 | itr #4 | Computing loss before
2019-02-16 20:14:08 | itr #4 | Computing KL before
2019-02-16 20:14:08 | itr #4 | Optimizing
2019-02-16 20:14:08 | itr #4 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:08 | itr #4 | computing loss before
2019-02-16 20:14:08 | itr #4 | performing update
2019-02-16 20:14:08 | itr #4 | computing gradient
2019-02-16 20:14:08 | itr #4 | gradient computed
2019-02-16 20:14:08 | itr #4 | computing descent direction
2019-02-16 20:14:08 | itr #4 | descent direction computed
2019-02-16 20:14:08 | itr #4 | backtrack iters: 1
2019-02-16 20:14:08 | itr #4 | computing loss after
2019-02-16 20:14:08 | itr #4 | optimization finished
2019-02-16 20:14:08 | itr #4 | Computing KL after
2019-02-16 20:14:08 | itr #4 | Computing loss after
2019-02-16 20:14:09 | itr #4 | Fitting baseline...
2019-02-16 2

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:09 | itr #5 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:09 | itr #5 | Logging diagnostics...
2019-02-16 20:14:09 | itr #5 | Optimizing policy...
2019-02-16 20:14:09 | itr #5 | Computing loss before
2019-02-16 20:14:09 | itr #5 | Computing KL before
2019-02-16 20:14:09 | itr #5 | Optimizing
2019-02-16 20:14:09 | itr #5 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:09 | itr #5 | computing loss before
2019-02-16 20:14:09 | itr #5 | performing update
2019-02-16 20:14:09 | itr #5 | computing gradient
2019-02-16 20:14:09 | itr #5 | gradient computed
2019-02-16 20:14:09 | itr #5 | computing descent direction
2019-02-16 20:14:09 | itr #5 | descent direction computed
2019-02-16 20:14:09 | itr #5 | backtrack iters: 1
2019-02-16 20:14:09 | itr #5 | computing loss after
2019-02-16 20:14:09 | itr #5 | optimization finished
2019-02-16 20:14:09 | itr #5 | Computing KL after
2019-02-16 20:14:09 | itr #5 | Computing loss after
2019-02-16 20:14:09 | itr #5 | Fitting baseline...
2019-02-16 2

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:10 | itr #6 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:10 | itr #6 | Logging diagnostics...
2019-02-16 20:14:10 | itr #6 | Optimizing policy...
2019-02-16 20:14:10 | itr #6 | Computing loss before
2019-02-16 20:14:10 | itr #6 | Computing KL before
2019-02-16 20:14:10 | itr #6 | Optimizing
2019-02-16 20:14:10 | itr #6 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:10 | itr #6 | computing loss before
2019-02-16 20:14:10 | itr #6 | performing update
2019-02-16 20:14:10 | itr #6 | computing gradient
2019-02-16 20:14:10 | itr #6 | gradient computed
2019-02-16 20:14:10 | itr #6 | computing descent direction
2019-02-16 20:14:10 | itr #6 | descent direction computed
2019-02-16 20:14:10 | itr #6 | backtrack iters: 1
2019-02-16 20:14:10 | itr #6 | computing loss after
2019-02-16 20:14:10 | itr #6 | optimization finished
2019-02-16 20:14:10 | itr #6 | Computing KL after
2019-02-16 20:14:10 | itr #6 | Computing loss after
2019-02-16 20:14:10 | itr #6 | Fitting baseline...
2019-02-16 2

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00

2019-02-16 20:14:11 | itr #7 | Processing samples...





2019-02-16 20:14:11 | itr #7 | Logging diagnostics...
2019-02-16 20:14:11 | itr #7 | Optimizing policy...
2019-02-16 20:14:11 | itr #7 | Computing loss before
2019-02-16 20:14:11 | itr #7 | Computing KL before
2019-02-16 20:14:11 | itr #7 | Optimizing
2019-02-16 20:14:11 | itr #7 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:11 | itr #7 | computing loss before
2019-02-16 20:14:11 | itr #7 | performing update
2019-02-16 20:14:11 | itr #7 | computing gradient
2019-02-16 20:14:11 | itr #7 | gradient computed
2019-02-16 20:14:11 | itr #7 | computing descent direction
2019-02-16 20:14:11 | itr #7 | descent direction computed
2019-02-16 20:14:11 | itr #7 | backtrack iters: 1
2019-02-16 20:14:11 | itr #7 | computing loss after
2019-02-16 20:14:11 | itr #7 | optimization finished
2019-02-16 20:14:11 | itr #7 | Computing KL after
2019-02-16 20:14:11 | itr #7 | Computing loss after
2019-02-16 20:14:11 | itr #7 | Fitting baseline...
2019-02-16 2

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:12 | itr #8 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:12 | itr #8 | Logging diagnostics...
2019-02-16 20:14:12 | itr #8 | Optimizing policy...
2019-02-16 20:14:12 | itr #8 | Computing loss before
2019-02-16 20:14:12 | itr #8 | Computing KL before
2019-02-16 20:14:12 | itr #8 | Optimizing
2019-02-16 20:14:12 | itr #8 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:12 | itr #8 | computing loss before
2019-02-16 20:14:12 | itr #8 | performing update
2019-02-16 20:14:12 | itr #8 | computing gradient
2019-02-16 20:14:12 | itr #8 | gradient computed
2019-02-16 20:14:12 | itr #8 | computing descent direction
2019-02-16 20:14:12 | itr #8 | descent direction computed
2019-02-16 20:14:12 | itr #8 | backtrack iters: 0
2019-02-16 20:14:12 | itr #8 | computing loss after
2019-02-16 20:14:12 | itr #8 | optimization finished
2019-02-16 20:14:12 | itr #8 | Computing KL after
2019-02-16 20:14:12 | itr #8 | Computing loss after
2019-02-16 20:14:12 | itr #8 | Fitting baseline...
2019-02-16 2

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:12 | itr #9 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:13 | itr #9 | Logging diagnostics...
2019-02-16 20:14:13 | itr #9 | Optimizing policy...
2019-02-16 20:14:13 | itr #9 | Computing loss before
2019-02-16 20:14:13 | itr #9 | Computing KL before
2019-02-16 20:14:13 | itr #9 | Optimizing
2019-02-16 20:14:13 | itr #9 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:13 | itr #9 | computing loss before
2019-02-16 20:14:13 | itr #9 | performing update
2019-02-16 20:14:13 | itr #9 | computing gradient
2019-02-16 20:14:13 | itr #9 | gradient computed
2019-02-16 20:14:13 | itr #9 | computing descent direction
2019-02-16 20:14:13 | itr #9 | descent direction computed
2019-02-16 20:14:13 | itr #9 | backtrack iters: 0
2019-02-16 20:14:13 | itr #9 | computing loss after
2019-02-16 20:14:13 | itr #9 | optimization finished
2019-02-16 20:14:13 | itr #9 | Computing KL after
2019-02-16 20:14:13 | itr #9 | Computing loss after
2019-02-16 20:14:13 | itr #9 | Fitting baseline...
2019-02-16 2

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:13 | itr #10 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:13 | itr #10 | Logging diagnostics...
2019-02-16 20:14:13 | itr #10 | Optimizing policy...
2019-02-16 20:14:13 | itr #10 | Computing loss before
2019-02-16 20:14:13 | itr #10 | Computing KL before
2019-02-16 20:14:13 | itr #10 | Optimizing
2019-02-16 20:14:13 | itr #10 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:13 | itr #10 | computing loss before
2019-02-16 20:14:13 | itr #10 | performing update
2019-02-16 20:14:13 | itr #10 | computing gradient
2019-02-16 20:14:13 | itr #10 | gradient computed
2019-02-16 20:14:13 | itr #10 | computing descent direction
2019-02-16 20:14:14 | itr #10 | descent direction computed
2019-02-16 20:14:14 | itr #10 | backtrack iters: 0
2019-02-16 20:14:14 | itr #10 | computing loss after
2019-02-16 20:14:14 | itr #10 | optimization finished
2019-02-16 20:14:14 | itr #10 | Computing KL after
2019-02-16 20:14:14 | itr #10 | Computing loss after
2019-02-16 20:14:14 | itr #10 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:14 | itr #11 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:14 | itr #11 | Logging diagnostics...
2019-02-16 20:14:14 | itr #11 | Optimizing policy...
2019-02-16 20:14:14 | itr #11 | Computing loss before
2019-02-16 20:14:14 | itr #11 | Computing KL before
2019-02-16 20:14:14 | itr #11 | Optimizing
2019-02-16 20:14:14 | itr #11 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:14 | itr #11 | computing loss before
2019-02-16 20:14:14 | itr #11 | performing update
2019-02-16 20:14:14 | itr #11 | computing gradient
2019-02-16 20:14:14 | itr #11 | gradient computed
2019-02-16 20:14:14 | itr #11 | computing descent direction
2019-02-16 20:14:15 | itr #11 | descent direction computed
2019-02-16 20:14:15 | itr #11 | backtrack iters: 0
2019-02-16 20:14:15 | itr #11 | computing loss after
2019-02-16 20:14:15 | itr #11 | optimization finished
2019-02-16 20:14:15 | itr #11 | Computing KL after
2019-02-16 20:14:15 | itr #11 | Computing loss after
2019-02-16 20:14:15 | itr #11 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:15 | itr #12 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:15 | itr #12 | Logging diagnostics...
2019-02-16 20:14:15 | itr #12 | Optimizing policy...
2019-02-16 20:14:15 | itr #12 | Computing loss before
2019-02-16 20:14:15 | itr #12 | Computing KL before
2019-02-16 20:14:15 | itr #12 | Optimizing
2019-02-16 20:14:15 | itr #12 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:15 | itr #12 | computing loss before
2019-02-16 20:14:15 | itr #12 | performing update
2019-02-16 20:14:15 | itr #12 | computing gradient
2019-02-16 20:14:15 | itr #12 | gradient computed
2019-02-16 20:14:15 | itr #12 | computing descent direction
2019-02-16 20:14:15 | itr #12 | descent direction computed
2019-02-16 20:14:15 | itr #12 | backtrack iters: 0
2019-02-16 20:14:15 | itr #12 | computing loss after
2019-02-16 20:14:15 | itr #12 | optimization finished
2019-02-16 20:14:15 | itr #12 | Computing KL after
2019-02-16 20:14:15 | itr #12 | Computing loss after
2019-02-16 20:14:15 | itr #12 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:16 | itr #13 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:16 | itr #13 | Logging diagnostics...
2019-02-16 20:14:16 | itr #13 | Optimizing policy...
2019-02-16 20:14:16 | itr #13 | Computing loss before
2019-02-16 20:14:16 | itr #13 | Computing KL before
2019-02-16 20:14:16 | itr #13 | Optimizing
2019-02-16 20:14:16 | itr #13 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:16 | itr #13 | computing loss before
2019-02-16 20:14:16 | itr #13 | performing update
2019-02-16 20:14:16 | itr #13 | computing gradient
2019-02-16 20:14:16 | itr #13 | gradient computed
2019-02-16 20:14:16 | itr #13 | computing descent direction
2019-02-16 20:14:16 | itr #13 | descent direction computed
2019-02-16 20:14:16 | itr #13 | backtrack iters: 0
2019-02-16 20:14:16 | itr #13 | computing loss after
2019-02-16 20:14:16 | itr #13 | optimization finished
2019-02-16 20:14:16 | itr #13 | Computing KL after
2019-02-16 20:14:16 | itr #13 | Computing loss after
2019-02-16 20:14:16 | itr #13 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:17 | itr #14 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:17 | itr #14 | Logging diagnostics...
2019-02-16 20:14:17 | itr #14 | Optimizing policy...
2019-02-16 20:14:17 | itr #14 | Computing loss before
2019-02-16 20:14:17 | itr #14 | Computing KL before
2019-02-16 20:14:17 | itr #14 | Optimizing
2019-02-16 20:14:17 | itr #14 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:17 | itr #14 | computing loss before
2019-02-16 20:14:17 | itr #14 | performing update
2019-02-16 20:14:17 | itr #14 | computing gradient
2019-02-16 20:14:17 | itr #14 | gradient computed
2019-02-16 20:14:17 | itr #14 | computing descent direction
2019-02-16 20:14:17 | itr #14 | descent direction computed
2019-02-16 20:14:17 | itr #14 | backtrack iters: 0
2019-02-16 20:14:17 | itr #14 | computing loss after
2019-02-16 20:14:17 | itr #14 | optimization finished
2019-02-16 20:14:17 | itr #14 | Computing KL after
2019-02-16 20:14:17 | itr #14 | Computing loss after
2019-02-16 20:14:17 | itr #14 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:18 | itr #15 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:18 | itr #15 | Logging diagnostics...
2019-02-16 20:14:18 | itr #15 | Optimizing policy...
2019-02-16 20:14:18 | itr #15 | Computing loss before
2019-02-16 20:14:18 | itr #15 | Computing KL before
2019-02-16 20:14:18 | itr #15 | Optimizing
2019-02-16 20:14:18 | itr #15 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:18 | itr #15 | computing loss before
2019-02-16 20:14:18 | itr #15 | performing update
2019-02-16 20:14:18 | itr #15 | computing gradient
2019-02-16 20:14:18 | itr #15 | gradient computed
2019-02-16 20:14:18 | itr #15 | computing descent direction
2019-02-16 20:14:18 | itr #15 | descent direction computed
2019-02-16 20:14:18 | itr #15 | backtrack iters: 0
2019-02-16 20:14:18 | itr #15 | computing loss after
2019-02-16 20:14:18 | itr #15 | optimization finished
2019-02-16 20:14:18 | itr #15 | Computing KL after
2019-02-16 20:14:18 | itr #15 | Computing loss after
2019-02-16 20:14:18 | itr #15 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00


2019-02-16 20:14:18 | itr #16 | Processing samples...
2019-02-16 20:14:18 | itr #16 | Logging diagnostics...
2019-02-16 20:14:18 | itr #16 | Optimizing policy...
2019-02-16 20:14:18 | itr #16 | Computing loss before
2019-02-16 20:14:18 | itr #16 | Computing KL before
2019-02-16 20:14:18 | itr #16 | Optimizing
2019-02-16 20:14:18 | itr #16 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:18 | itr #16 | computing loss before
2019-02-16 20:14:18 | itr #16 | performing update
2019-02-16 20:14:18 | itr #16 | computing gradient
2019-02-16 20:14:18 | itr #16 | gradient computed
2019-02-16 20:14:18 | itr #16 | computing descent direction
2019-02-16 20:14:19 | itr #16 | descent direction computed
2019-02-16 20:14:19 | itr #16 | backtrack iters: 1
2019-02-16 20:14:19 | itr #16 | computing loss after
2019-02-16 20:14:19 | itr #16 | optimization finished
2019-02-16 20:14:19 | itr #16 | Computing KL after
2019-02-16 20:14:19 | itr #16 | Computing los

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:19 | itr #17 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:19 | itr #17 | Logging diagnostics...
2019-02-16 20:14:19 | itr #17 | Optimizing policy...
2019-02-16 20:14:19 | itr #17 | Computing loss before
2019-02-16 20:14:19 | itr #17 | Computing KL before
2019-02-16 20:14:19 | itr #17 | Optimizing
2019-02-16 20:14:19 | itr #17 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:19 | itr #17 | computing loss before
2019-02-16 20:14:19 | itr #17 | performing update
2019-02-16 20:14:19 | itr #17 | computing gradient
2019-02-16 20:14:19 | itr #17 | gradient computed
2019-02-16 20:14:19 | itr #17 | computing descent direction
2019-02-16 20:14:19 | itr #17 | descent direction computed
2019-02-16 20:14:19 | itr #17 | backtrack iters: 0
2019-02-16 20:14:19 | itr #17 | computing loss after
2019-02-16 20:14:19 | itr #17 | optimization finished
2019-02-16 20:14:19 | itr #17 | Computing KL after
2019-02-16 20:14:19 | itr #17 | Computing loss after
2019-02-16 20:14:20 | itr #17 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:20 | itr #18 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:20 | itr #18 | Logging diagnostics...
2019-02-16 20:14:20 | itr #18 | Optimizing policy...
2019-02-16 20:14:20 | itr #18 | Computing loss before
2019-02-16 20:14:20 | itr #18 | Computing KL before
2019-02-16 20:14:20 | itr #18 | Optimizing
2019-02-16 20:14:20 | itr #18 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:20 | itr #18 | computing loss before
2019-02-16 20:14:20 | itr #18 | performing update
2019-02-16 20:14:20 | itr #18 | computing gradient
2019-02-16 20:14:20 | itr #18 | gradient computed
2019-02-16 20:14:20 | itr #18 | computing descent direction
2019-02-16 20:14:20 | itr #18 | descent direction computed
2019-02-16 20:14:20 | itr #18 | backtrack iters: 4
2019-02-16 20:14:20 | itr #18 | computing loss after
2019-02-16 20:14:20 | itr #18 | optimization finished
2019-02-16 20:14:20 | itr #18 | Computing KL after
2019-02-16 20:14:20 | itr #18 | Computing loss after
2019-02-16 20:14:20 | itr #18 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:21 | itr #19 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:21 | itr #19 | Logging diagnostics...
2019-02-16 20:14:21 | itr #19 | Optimizing policy...
2019-02-16 20:14:21 | itr #19 | Computing loss before
2019-02-16 20:14:21 | itr #19 | Computing KL before
2019-02-16 20:14:21 | itr #19 | Optimizing
2019-02-16 20:14:21 | itr #19 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:21 | itr #19 | computing loss before
2019-02-16 20:14:21 | itr #19 | performing update
2019-02-16 20:14:21 | itr #19 | computing gradient
2019-02-16 20:14:21 | itr #19 | gradient computed
2019-02-16 20:14:21 | itr #19 | computing descent direction
2019-02-16 20:14:21 | itr #19 | descent direction computed
2019-02-16 20:14:21 | itr #19 | backtrack iters: 0
2019-02-16 20:14:21 | itr #19 | computing loss after
2019-02-16 20:14:21 | itr #19 | optimization finished
2019-02-16 20:14:21 | itr #19 | Computing KL after
2019-02-16 20:14:21 | itr #19 | Computing loss after
2019-02-16 20:14:21 | itr #19 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:22 | itr #20 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:22 | itr #20 | Logging diagnostics...
2019-02-16 20:14:22 | itr #20 | Optimizing policy...
2019-02-16 20:14:22 | itr #20 | Computing loss before
2019-02-16 20:14:22 | itr #20 | Computing KL before
2019-02-16 20:14:22 | itr #20 | Optimizing
2019-02-16 20:14:22 | itr #20 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:22 | itr #20 | computing loss before
2019-02-16 20:14:22 | itr #20 | performing update
2019-02-16 20:14:22 | itr #20 | computing gradient
2019-02-16 20:14:22 | itr #20 | gradient computed
2019-02-16 20:14:22 | itr #20 | computing descent direction
2019-02-16 20:14:22 | itr #20 | descent direction computed
2019-02-16 20:14:22 | itr #20 | backtrack iters: 0
2019-02-16 20:14:22 | itr #20 | computing loss after
2019-02-16 20:14:22 | itr #20 | optimization finished
2019-02-16 20:14:22 | itr #20 | Computing KL after
2019-02-16 20:14:22 | itr #20 | Computing loss after
2019-02-16 20:14:22 | itr #20 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:22 | itr #21 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:22 | itr #21 | Logging diagnostics...
2019-02-16 20:14:22 | itr #21 | Optimizing policy...
2019-02-16 20:14:22 | itr #21 | Computing loss before
2019-02-16 20:14:22 | itr #21 | Computing KL before
2019-02-16 20:14:22 | itr #21 | Optimizing
2019-02-16 20:14:22 | itr #21 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:22 | itr #21 | computing loss before
2019-02-16 20:14:22 | itr #21 | performing update
2019-02-16 20:14:22 | itr #21 | computing gradient
2019-02-16 20:14:23 | itr #21 | gradient computed
2019-02-16 20:14:23 | itr #21 | computing descent direction
2019-02-16 20:14:23 | itr #21 | descent direction computed
2019-02-16 20:14:23 | itr #21 | backtrack iters: 1
2019-02-16 20:14:23 | itr #21 | computing loss after
2019-02-16 20:14:23 | itr #21 | optimization finished
2019-02-16 20:14:23 | itr #21 | Computing KL after
2019-02-16 20:14:23 | itr #21 | Computing loss after
2019-02-16 20:14:23 | itr #21 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:23 | itr #22 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:23 | itr #22 | Logging diagnostics...
2019-02-16 20:14:23 | itr #22 | Optimizing policy...
2019-02-16 20:14:23 | itr #22 | Computing loss before
2019-02-16 20:14:23 | itr #22 | Computing KL before
2019-02-16 20:14:23 | itr #22 | Optimizing
2019-02-16 20:14:23 | itr #22 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:23 | itr #22 | computing loss before
2019-02-16 20:14:23 | itr #22 | performing update
2019-02-16 20:14:23 | itr #22 | computing gradient
2019-02-16 20:14:23 | itr #22 | gradient computed
2019-02-16 20:14:23 | itr #22 | computing descent direction
2019-02-16 20:14:23 | itr #22 | descent direction computed
2019-02-16 20:14:23 | itr #22 | backtrack iters: 0
2019-02-16 20:14:23 | itr #22 | computing loss after
2019-02-16 20:14:23 | itr #22 | optimization finished
2019-02-16 20:14:23 | itr #22 | Computing KL after
2019-02-16 20:14:23 | itr #22 | Computing loss after
2019-02-16 20:14:24 | itr #22 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:24 | itr #23 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:24 | itr #23 | Logging diagnostics...
2019-02-16 20:14:24 | itr #23 | Optimizing policy...
2019-02-16 20:14:24 | itr #23 | Computing loss before
2019-02-16 20:14:24 | itr #23 | Computing KL before
2019-02-16 20:14:24 | itr #23 | Optimizing
2019-02-16 20:14:24 | itr #23 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:24 | itr #23 | computing loss before
2019-02-16 20:14:24 | itr #23 | performing update
2019-02-16 20:14:24 | itr #23 | computing gradient
2019-02-16 20:14:24 | itr #23 | gradient computed
2019-02-16 20:14:24 | itr #23 | computing descent direction
2019-02-16 20:14:24 | itr #23 | descent direction computed
2019-02-16 20:14:24 | itr #23 | backtrack iters: 4
2019-02-16 20:14:24 | itr #23 | computing loss after
2019-02-16 20:14:24 | itr #23 | optimization finished
2019-02-16 20:14:24 | itr #23 | Computing KL after
2019-02-16 20:14:24 | itr #23 | Computing loss after
2019-02-16 20:14:24 | itr #23 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:25 | itr #24 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:25 | itr #24 | Logging diagnostics...
2019-02-16 20:14:25 | itr #24 | Optimizing policy...
2019-02-16 20:14:25 | itr #24 | Computing loss before
2019-02-16 20:14:25 | itr #24 | Computing KL before
2019-02-16 20:14:25 | itr #24 | Optimizing
2019-02-16 20:14:25 | itr #24 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:25 | itr #24 | computing loss before
2019-02-16 20:14:25 | itr #24 | performing update
2019-02-16 20:14:25 | itr #24 | computing gradient
2019-02-16 20:14:25 | itr #24 | gradient computed
2019-02-16 20:14:25 | itr #24 | computing descent direction
2019-02-16 20:14:25 | itr #24 | descent direction computed
2019-02-16 20:14:25 | itr #24 | backtrack iters: 0
2019-02-16 20:14:25 | itr #24 | computing loss after
2019-02-16 20:14:25 | itr #24 | optimization finished
2019-02-16 20:14:25 | itr #24 | Computing KL after
2019-02-16 20:14:25 | itr #24 | Computing loss after
2019-02-16 20:14:25 | itr #24 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:26 | itr #25 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:26 | itr #25 | Logging diagnostics...
2019-02-16 20:14:26 | itr #25 | Optimizing policy...
2019-02-16 20:14:26 | itr #25 | Computing loss before
2019-02-16 20:14:26 | itr #25 | Computing KL before
2019-02-16 20:14:26 | itr #25 | Optimizing
2019-02-16 20:14:26 | itr #25 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:26 | itr #25 | computing loss before
2019-02-16 20:14:26 | itr #25 | performing update
2019-02-16 20:14:26 | itr #25 | computing gradient
2019-02-16 20:14:26 | itr #25 | gradient computed
2019-02-16 20:14:26 | itr #25 | computing descent direction
2019-02-16 20:14:26 | itr #25 | descent direction computed
2019-02-16 20:14:26 | itr #25 | backtrack iters: 0
2019-02-16 20:14:26 | itr #25 | computing loss after
2019-02-16 20:14:26 | itr #25 | optimization finished
2019-02-16 20:14:26 | itr #25 | Computing KL after
2019-02-16 20:14:26 | itr #25 | Computing loss after
2019-02-16 20:14:26 | itr #25 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:26 | itr #26 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:27 | itr #26 | Logging diagnostics...
2019-02-16 20:14:27 | itr #26 | Optimizing policy...
2019-02-16 20:14:27 | itr #26 | Computing loss before
2019-02-16 20:14:27 | itr #26 | Computing KL before
2019-02-16 20:14:27 | itr #26 | Optimizing
2019-02-16 20:14:27 | itr #26 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:27 | itr #26 | computing loss before
2019-02-16 20:14:27 | itr #26 | performing update
2019-02-16 20:14:27 | itr #26 | computing gradient
2019-02-16 20:14:27 | itr #26 | gradient computed
2019-02-16 20:14:27 | itr #26 | computing descent direction
2019-02-16 20:14:27 | itr #26 | descent direction computed
2019-02-16 20:14:27 | itr #26 | backtrack iters: 2
2019-02-16 20:14:27 | itr #26 | computing loss after
2019-02-16 20:14:27 | itr #26 | optimization finished
2019-02-16 20:14:27 | itr #26 | Computing KL after
2019-02-16 20:14:27 | itr #26 | Computing loss after
2019-02-16 20:14:27 | itr #26 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:27 | itr #27 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:27 | itr #27 | Logging diagnostics...
2019-02-16 20:14:27 | itr #27 | Optimizing policy...
2019-02-16 20:14:27 | itr #27 | Computing loss before
2019-02-16 20:14:27 | itr #27 | Computing KL before
2019-02-16 20:14:27 | itr #27 | Optimizing
2019-02-16 20:14:27 | itr #27 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:27 | itr #27 | computing loss before
2019-02-16 20:14:27 | itr #27 | performing update
2019-02-16 20:14:27 | itr #27 | computing gradient
2019-02-16 20:14:27 | itr #27 | gradient computed
2019-02-16 20:14:27 | itr #27 | computing descent direction
2019-02-16 20:14:28 | itr #27 | descent direction computed
2019-02-16 20:14:28 | itr #27 | backtrack iters: 1
2019-02-16 20:14:28 | itr #27 | computing loss after
2019-02-16 20:14:28 | itr #27 | optimization finished
2019-02-16 20:14:28 | itr #27 | Computing KL after
2019-02-16 20:14:28 | itr #27 | Computing loss after
2019-02-16 20:14:28 | itr #27 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:28 | itr #28 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:28 | itr #28 | Logging diagnostics...
2019-02-16 20:14:28 | itr #28 | Optimizing policy...
2019-02-16 20:14:28 | itr #28 | Computing loss before
2019-02-16 20:14:28 | itr #28 | Computing KL before
2019-02-16 20:14:28 | itr #28 | Optimizing
2019-02-16 20:14:28 | itr #28 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:28 | itr #28 | computing loss before
2019-02-16 20:14:28 | itr #28 | performing update
2019-02-16 20:14:28 | itr #28 | computing gradient
2019-02-16 20:14:28 | itr #28 | gradient computed
2019-02-16 20:14:28 | itr #28 | computing descent direction
2019-02-16 20:14:28 | itr #28 | descent direction computed
2019-02-16 20:14:28 | itr #28 | backtrack iters: 0
2019-02-16 20:14:28 | itr #28 | computing loss after
2019-02-16 20:14:28 | itr #28 | optimization finished
2019-02-16 20:14:28 | itr #28 | Computing KL after
2019-02-16 20:14:28 | itr #28 | Computing loss after
2019-02-16 20:14:28 | itr #28 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:29 | itr #29 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:29 | itr #29 | Logging diagnostics...
2019-02-16 20:14:29 | itr #29 | Optimizing policy...
2019-02-16 20:14:29 | itr #29 | Computing loss before
2019-02-16 20:14:29 | itr #29 | Computing KL before
2019-02-16 20:14:29 | itr #29 | Optimizing
2019-02-16 20:14:29 | itr #29 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:29 | itr #29 | computing loss before
2019-02-16 20:14:29 | itr #29 | performing update
2019-02-16 20:14:29 | itr #29 | computing gradient
2019-02-16 20:14:29 | itr #29 | gradient computed
2019-02-16 20:14:29 | itr #29 | computing descent direction
2019-02-16 20:14:29 | itr #29 | descent direction computed
2019-02-16 20:14:29 | itr #29 | backtrack iters: 0
2019-02-16 20:14:29 | itr #29 | computing loss after
2019-02-16 20:14:29 | itr #29 | optimization finished
2019-02-16 20:14:29 | itr #29 | Computing KL after
2019-02-16 20:14:29 | itr #29 | Computing loss after
2019-02-16 20:14:29 | itr #29 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:30 | itr #30 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:30 | itr #30 | Logging diagnostics...
2019-02-16 20:14:30 | itr #30 | Optimizing policy...
2019-02-16 20:14:30 | itr #30 | Computing loss before
2019-02-16 20:14:30 | itr #30 | Computing KL before
2019-02-16 20:14:30 | itr #30 | Optimizing
2019-02-16 20:14:30 | itr #30 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:30 | itr #30 | computing loss before
2019-02-16 20:14:30 | itr #30 | performing update
2019-02-16 20:14:30 | itr #30 | computing gradient
2019-02-16 20:14:30 | itr #30 | gradient computed
2019-02-16 20:14:30 | itr #30 | computing descent direction
2019-02-16 20:14:30 | itr #30 | descent direction computed
2019-02-16 20:14:30 | itr #30 | backtrack iters: 0
2019-02-16 20:14:30 | itr #30 | computing loss after
2019-02-16 20:14:30 | itr #30 | optimization finished
2019-02-16 20:14:30 | itr #30 | Computing KL after
2019-02-16 20:14:30 | itr #30 | Computing loss after
2019-02-16 20:14:30 | itr #30 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:30 | itr #31 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:31 | itr #31 | Logging diagnostics...
2019-02-16 20:14:31 | itr #31 | Optimizing policy...
2019-02-16 20:14:31 | itr #31 | Computing loss before
2019-02-16 20:14:31 | itr #31 | Computing KL before
2019-02-16 20:14:31 | itr #31 | Optimizing
2019-02-16 20:14:31 | itr #31 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:31 | itr #31 | computing loss before
2019-02-16 20:14:31 | itr #31 | performing update
2019-02-16 20:14:31 | itr #31 | computing gradient
2019-02-16 20:14:31 | itr #31 | gradient computed
2019-02-16 20:14:31 | itr #31 | computing descent direction
2019-02-16 20:14:31 | itr #31 | descent direction computed
2019-02-16 20:14:31 | itr #31 | backtrack iters: 0
2019-02-16 20:14:31 | itr #31 | computing loss after
2019-02-16 20:14:31 | itr #31 | optimization finished
2019-02-16 20:14:31 | itr #31 | Computing KL after
2019-02-16 20:14:31 | itr #31 | Computing loss after
2019-02-16 20:14:31 | itr #31 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:31 | itr #32 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:31 | itr #32 | Logging diagnostics...
2019-02-16 20:14:31 | itr #32 | Optimizing policy...
2019-02-16 20:14:31 | itr #32 | Computing loss before
2019-02-16 20:14:31 | itr #32 | Computing KL before
2019-02-16 20:14:31 | itr #32 | Optimizing
2019-02-16 20:14:31 | itr #32 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:31 | itr #32 | computing loss before
2019-02-16 20:14:31 | itr #32 | performing update
2019-02-16 20:14:31 | itr #32 | computing gradient
2019-02-16 20:14:31 | itr #32 | gradient computed
2019-02-16 20:14:31 | itr #32 | computing descent direction
2019-02-16 20:14:32 | itr #32 | descent direction computed
2019-02-16 20:14:32 | itr #32 | backtrack iters: 1
2019-02-16 20:14:32 | itr #32 | computing loss after
2019-02-16 20:14:32 | itr #32 | optimization finished
2019-02-16 20:14:32 | itr #32 | Computing KL after
2019-02-16 20:14:32 | itr #32 | Computing loss after
2019-02-16 20:14:32 | itr #32 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:32 | itr #33 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:32 | itr #33 | Logging diagnostics...
2019-02-16 20:14:32 | itr #33 | Optimizing policy...
2019-02-16 20:14:32 | itr #33 | Computing loss before
2019-02-16 20:14:32 | itr #33 | Computing KL before
2019-02-16 20:14:32 | itr #33 | Optimizing
2019-02-16 20:14:32 | itr #33 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:32 | itr #33 | computing loss before
2019-02-16 20:14:32 | itr #33 | performing update
2019-02-16 20:14:32 | itr #33 | computing gradient
2019-02-16 20:14:32 | itr #33 | gradient computed
2019-02-16 20:14:32 | itr #33 | computing descent direction
2019-02-16 20:14:32 | itr #33 | descent direction computed
2019-02-16 20:14:32 | itr #33 | backtrack iters: 0
2019-02-16 20:14:32 | itr #33 | computing loss after
2019-02-16 20:14:32 | itr #33 | optimization finished
2019-02-16 20:14:32 | itr #33 | Computing KL after
2019-02-16 20:14:32 | itr #33 | Computing loss after
2019-02-16 20:14:33 | itr #33 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:33 | itr #34 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:33 | itr #34 | Logging diagnostics...
2019-02-16 20:14:33 | itr #34 | Optimizing policy...
2019-02-16 20:14:33 | itr #34 | Computing loss before
2019-02-16 20:14:33 | itr #34 | Computing KL before
2019-02-16 20:14:33 | itr #34 | Optimizing
2019-02-16 20:14:33 | itr #34 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:33 | itr #34 | computing loss before
2019-02-16 20:14:33 | itr #34 | performing update
2019-02-16 20:14:33 | itr #34 | computing gradient
2019-02-16 20:14:33 | itr #34 | gradient computed
2019-02-16 20:14:33 | itr #34 | computing descent direction
2019-02-16 20:14:33 | itr #34 | descent direction computed
2019-02-16 20:14:33 | itr #34 | backtrack iters: 0
2019-02-16 20:14:33 | itr #34 | computing loss after
2019-02-16 20:14:33 | itr #34 | optimization finished
2019-02-16 20:14:33 | itr #34 | Computing KL after
2019-02-16 20:14:33 | itr #34 | Computing loss after
2019-02-16 20:14:33 | itr #34 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:34 | itr #35 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:34 | itr #35 | Logging diagnostics...
2019-02-16 20:14:34 | itr #35 | Optimizing policy...
2019-02-16 20:14:34 | itr #35 | Computing loss before
2019-02-16 20:14:34 | itr #35 | Computing KL before
2019-02-16 20:14:34 | itr #35 | Optimizing
2019-02-16 20:14:34 | itr #35 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:34 | itr #35 | computing loss before
2019-02-16 20:14:34 | itr #35 | performing update
2019-02-16 20:14:34 | itr #35 | computing gradient
2019-02-16 20:14:34 | itr #35 | gradient computed
2019-02-16 20:14:34 | itr #35 | computing descent direction
2019-02-16 20:14:34 | itr #35 | descent direction computed
2019-02-16 20:14:34 | itr #35 | backtrack iters: 1
2019-02-16 20:14:34 | itr #35 | computing loss after
2019-02-16 20:14:34 | itr #35 | optimization finished
2019-02-16 20:14:34 | itr #35 | Computing KL after
2019-02-16 20:14:34 | itr #35 | Computing loss after
2019-02-16 20:14:34 | itr #35 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:34 | itr #36 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:35 | itr #36 | Logging diagnostics...
2019-02-16 20:14:35 | itr #36 | Optimizing policy...
2019-02-16 20:14:35 | itr #36 | Computing loss before
2019-02-16 20:14:35 | itr #36 | Computing KL before
2019-02-16 20:14:35 | itr #36 | Optimizing
2019-02-16 20:14:35 | itr #36 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:35 | itr #36 | computing loss before
2019-02-16 20:14:35 | itr #36 | performing update
2019-02-16 20:14:35 | itr #36 | computing gradient
2019-02-16 20:14:35 | itr #36 | gradient computed
2019-02-16 20:14:35 | itr #36 | computing descent direction
2019-02-16 20:14:35 | itr #36 | descent direction computed
2019-02-16 20:14:35 | itr #36 | backtrack iters: 0
2019-02-16 20:14:35 | itr #36 | computing loss after
2019-02-16 20:14:35 | itr #36 | optimization finished
2019-02-16 20:14:35 | itr #36 | Computing KL after
2019-02-16 20:14:35 | itr #36 | Computing loss after
2019-02-16 20:14:35 | itr #36 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:35 | itr #37 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:35 | itr #37 | Logging diagnostics...
2019-02-16 20:14:35 | itr #37 | Optimizing policy...
2019-02-16 20:14:35 | itr #37 | Computing loss before
2019-02-16 20:14:35 | itr #37 | Computing KL before
2019-02-16 20:14:35 | itr #37 | Optimizing
2019-02-16 20:14:35 | itr #37 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:35 | itr #37 | computing loss before
2019-02-16 20:14:35 | itr #37 | performing update
2019-02-16 20:14:35 | itr #37 | computing gradient
2019-02-16 20:14:35 | itr #37 | gradient computed
2019-02-16 20:14:35 | itr #37 | computing descent direction
2019-02-16 20:14:36 | itr #37 | descent direction computed
2019-02-16 20:14:36 | itr #37 | backtrack iters: 2
2019-02-16 20:14:36 | itr #37 | computing loss after
2019-02-16 20:14:36 | itr #37 | optimization finished
2019-02-16 20:14:36 | itr #37 | Computing KL after
2019-02-16 20:14:36 | itr #37 | Computing loss after
2019-02-16 20:14:36 | itr #37 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:36 | itr #38 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:36 | itr #38 | Logging diagnostics...
2019-02-16 20:14:36 | itr #38 | Optimizing policy...
2019-02-16 20:14:36 | itr #38 | Computing loss before
2019-02-16 20:14:36 | itr #38 | Computing KL before
2019-02-16 20:14:36 | itr #38 | Optimizing
2019-02-16 20:14:36 | itr #38 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:36 | itr #38 | computing loss before
2019-02-16 20:14:36 | itr #38 | performing update
2019-02-16 20:14:36 | itr #38 | computing gradient
2019-02-16 20:14:36 | itr #38 | gradient computed
2019-02-16 20:14:36 | itr #38 | computing descent direction
2019-02-16 20:14:37 | itr #38 | descent direction computed
2019-02-16 20:14:37 | itr #38 | backtrack iters: 0
2019-02-16 20:14:37 | itr #38 | computing loss after
2019-02-16 20:14:37 | itr #38 | optimization finished
2019-02-16 20:14:37 | itr #38 | Computing KL after
2019-02-16 20:14:37 | itr #38 | Computing loss after
2019-02-16 20:14:37 | itr #38 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:37 | itr #39 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:37 | itr #39 | Logging diagnostics...
2019-02-16 20:14:37 | itr #39 | Optimizing policy...
2019-02-16 20:14:37 | itr #39 | Computing loss before
2019-02-16 20:14:37 | itr #39 | Computing KL before
2019-02-16 20:14:37 | itr #39 | Optimizing
2019-02-16 20:14:37 | itr #39 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:37 | itr #39 | computing loss before
2019-02-16 20:14:37 | itr #39 | performing update
2019-02-16 20:14:37 | itr #39 | computing gradient
2019-02-16 20:14:37 | itr #39 | gradient computed
2019-02-16 20:14:37 | itr #39 | computing descent direction
2019-02-16 20:14:37 | itr #39 | descent direction computed
2019-02-16 20:14:37 | itr #39 | backtrack iters: 0
2019-02-16 20:14:37 | itr #39 | computing loss after
2019-02-16 20:14:37 | itr #39 | optimization finished
2019-02-16 20:14:37 | itr #39 | Computing KL after
2019-02-16 20:14:37 | itr #39 | Computing loss after
2019-02-16 20:14:37 | itr #39 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:38 | itr #40 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:38 | itr #40 | Logging diagnostics...
2019-02-16 20:14:38 | itr #40 | Optimizing policy...
2019-02-16 20:14:38 | itr #40 | Computing loss before
2019-02-16 20:14:38 | itr #40 | Computing KL before
2019-02-16 20:14:38 | itr #40 | Optimizing
2019-02-16 20:14:38 | itr #40 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:38 | itr #40 | computing loss before
2019-02-16 20:14:38 | itr #40 | performing update
2019-02-16 20:14:38 | itr #40 | computing gradient
2019-02-16 20:14:38 | itr #40 | gradient computed
2019-02-16 20:14:38 | itr #40 | computing descent direction
2019-02-16 20:14:38 | itr #40 | descent direction computed
2019-02-16 20:14:38 | itr #40 | backtrack iters: 0
2019-02-16 20:14:38 | itr #40 | computing loss after
2019-02-16 20:14:38 | itr #40 | optimization finished
2019-02-16 20:14:38 | itr #40 | Computing KL after
2019-02-16 20:14:38 | itr #40 | Computing loss after
2019-02-16 20:14:38 | itr #40 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:39 | itr #41 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:39 | itr #41 | Logging diagnostics...
2019-02-16 20:14:39 | itr #41 | Optimizing policy...
2019-02-16 20:14:39 | itr #41 | Computing loss before
2019-02-16 20:14:39 | itr #41 | Computing KL before
2019-02-16 20:14:39 | itr #41 | Optimizing
2019-02-16 20:14:39 | itr #41 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:39 | itr #41 | computing loss before
2019-02-16 20:14:39 | itr #41 | performing update
2019-02-16 20:14:39 | itr #41 | computing gradient
2019-02-16 20:14:39 | itr #41 | gradient computed
2019-02-16 20:14:39 | itr #41 | computing descent direction
2019-02-16 20:14:39 | itr #41 | descent direction computed
2019-02-16 20:14:39 | itr #41 | backtrack iters: 1
2019-02-16 20:14:39 | itr #41 | computing loss after
2019-02-16 20:14:39 | itr #41 | optimization finished
2019-02-16 20:14:39 | itr #41 | Computing KL after
2019-02-16 20:14:39 | itr #41 | Computing loss after
2019-02-16 20:14:39 | itr #41 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:39 | itr #42 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:39 | itr #42 | Logging diagnostics...
2019-02-16 20:14:39 | itr #42 | Optimizing policy...
2019-02-16 20:14:39 | itr #42 | Computing loss before
2019-02-16 20:14:39 | itr #42 | Computing KL before
2019-02-16 20:14:39 | itr #42 | Optimizing
2019-02-16 20:14:39 | itr #42 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:40 | itr #42 | computing loss before
2019-02-16 20:14:40 | itr #42 | performing update
2019-02-16 20:14:40 | itr #42 | computing gradient
2019-02-16 20:14:40 | itr #42 | gradient computed
2019-02-16 20:14:40 | itr #42 | computing descent direction
2019-02-16 20:14:40 | itr #42 | descent direction computed
2019-02-16 20:14:40 | itr #42 | backtrack iters: 0
2019-02-16 20:14:40 | itr #42 | computing loss after
2019-02-16 20:14:40 | itr #42 | optimization finished
2019-02-16 20:14:40 | itr #42 | Computing KL after
2019-02-16 20:14:40 | itr #42 | Computing loss after
2019-02-16 20:14:40 | itr #42 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:40 | itr #43 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:40 | itr #43 | Logging diagnostics...
2019-02-16 20:14:40 | itr #43 | Optimizing policy...
2019-02-16 20:14:40 | itr #43 | Computing loss before
2019-02-16 20:14:40 | itr #43 | Computing KL before
2019-02-16 20:14:40 | itr #43 | Optimizing
2019-02-16 20:14:40 | itr #43 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:40 | itr #43 | computing loss before
2019-02-16 20:14:40 | itr #43 | performing update
2019-02-16 20:14:40 | itr #43 | computing gradient
2019-02-16 20:14:40 | itr #43 | gradient computed
2019-02-16 20:14:40 | itr #43 | computing descent direction
2019-02-16 20:14:40 | itr #43 | descent direction computed
2019-02-16 20:14:41 | itr #43 | backtrack iters: 3
2019-02-16 20:14:41 | itr #43 | computing loss after
2019-02-16 20:14:41 | itr #43 | optimization finished
2019-02-16 20:14:41 | itr #43 | Computing KL after
2019-02-16 20:14:41 | itr #43 | Computing loss after
2019-02-16 20:14:41 | itr #43 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:41 | itr #44 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:41 | itr #44 | Logging diagnostics...
2019-02-16 20:14:41 | itr #44 | Optimizing policy...
2019-02-16 20:14:41 | itr #44 | Computing loss before
2019-02-16 20:14:41 | itr #44 | Computing KL before
2019-02-16 20:14:41 | itr #44 | Optimizing
2019-02-16 20:14:41 | itr #44 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:41 | itr #44 | computing loss before
2019-02-16 20:14:41 | itr #44 | performing update
2019-02-16 20:14:41 | itr #44 | computing gradient
2019-02-16 20:14:41 | itr #44 | gradient computed
2019-02-16 20:14:41 | itr #44 | computing descent direction
2019-02-16 20:14:41 | itr #44 | descent direction computed
2019-02-16 20:14:41 | itr #44 | backtrack iters: 2
2019-02-16 20:14:41 | itr #44 | computing loss after
2019-02-16 20:14:41 | itr #44 | optimization finished
2019-02-16 20:14:41 | itr #44 | Computing KL after
2019-02-16 20:14:41 | itr #44 | Computing loss after
2019-02-16 20:14:41 | itr #44 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:42 | itr #45 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:42 | itr #45 | Logging diagnostics...
2019-02-16 20:14:42 | itr #45 | Optimizing policy...
2019-02-16 20:14:42 | itr #45 | Computing loss before
2019-02-16 20:14:42 | itr #45 | Computing KL before
2019-02-16 20:14:42 | itr #45 | Optimizing
2019-02-16 20:14:42 | itr #45 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:42 | itr #45 | computing loss before
2019-02-16 20:14:42 | itr #45 | performing update
2019-02-16 20:14:42 | itr #45 | computing gradient
2019-02-16 20:14:42 | itr #45 | gradient computed
2019-02-16 20:14:42 | itr #45 | computing descent direction
2019-02-16 20:14:42 | itr #45 | descent direction computed
2019-02-16 20:14:42 | itr #45 | backtrack iters: 1
2019-02-16 20:14:42 | itr #45 | computing loss after
2019-02-16 20:14:42 | itr #45 | optimization finished
2019-02-16 20:14:42 | itr #45 | Computing KL after
2019-02-16 20:14:42 | itr #45 | Computing loss after
2019-02-16 20:14:42 | itr #45 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:43 | itr #46 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:43 | itr #46 | Logging diagnostics...
2019-02-16 20:14:43 | itr #46 | Optimizing policy...
2019-02-16 20:14:43 | itr #46 | Computing loss before
2019-02-16 20:14:43 | itr #46 | Computing KL before
2019-02-16 20:14:43 | itr #46 | Optimizing
2019-02-16 20:14:43 | itr #46 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:43 | itr #46 | computing loss before
2019-02-16 20:14:43 | itr #46 | performing update
2019-02-16 20:14:43 | itr #46 | computing gradient
2019-02-16 20:14:43 | itr #46 | gradient computed
2019-02-16 20:14:43 | itr #46 | computing descent direction
2019-02-16 20:14:43 | itr #46 | descent direction computed
2019-02-16 20:14:43 | itr #46 | backtrack iters: 0
2019-02-16 20:14:43 | itr #46 | computing loss after
2019-02-16 20:14:43 | itr #46 | optimization finished
2019-02-16 20:14:43 | itr #46 | Computing KL after
2019-02-16 20:14:43 | itr #46 | Computing loss after
2019-02-16 20:14:43 | itr #46 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:43 | itr #47 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:43 | itr #47 | Logging diagnostics...
2019-02-16 20:14:43 | itr #47 | Optimizing policy...
2019-02-16 20:14:43 | itr #47 | Computing loss before
2019-02-16 20:14:43 | itr #47 | Computing KL before
2019-02-16 20:14:43 | itr #47 | Optimizing
2019-02-16 20:14:43 | itr #47 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:43 | itr #47 | computing loss before
2019-02-16 20:14:43 | itr #47 | performing update
2019-02-16 20:14:43 | itr #47 | computing gradient
2019-02-16 20:14:43 | itr #47 | gradient computed
2019-02-16 20:14:43 | itr #47 | computing descent direction
2019-02-16 20:14:43 | itr #47 | descent direction computed
2019-02-16 20:14:44 | itr #47 | backtrack iters: 0
2019-02-16 20:14:44 | itr #47 | computing loss after
2019-02-16 20:14:44 | itr #47 | optimization finished
2019-02-16 20:14:44 | itr #47 | Computing KL after
2019-02-16 20:14:44 | itr #47 | Computing loss after
2019-02-16 20:14:44 | itr #47 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:44 | itr #48 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:44 | itr #48 | Logging diagnostics...
2019-02-16 20:14:44 | itr #48 | Optimizing policy...
2019-02-16 20:14:44 | itr #48 | Computing loss before
2019-02-16 20:14:44 | itr #48 | Computing KL before
2019-02-16 20:14:44 | itr #48 | Optimizing
2019-02-16 20:14:44 | itr #48 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:44 | itr #48 | computing loss before
2019-02-16 20:14:44 | itr #48 | performing update
2019-02-16 20:14:44 | itr #48 | computing gradient
2019-02-16 20:14:44 | itr #48 | gradient computed
2019-02-16 20:14:44 | itr #48 | computing descent direction
2019-02-16 20:14:44 | itr #48 | descent direction computed
2019-02-16 20:14:44 | itr #48 | backtrack iters: 1
2019-02-16 20:14:44 | itr #48 | computing loss after
2019-02-16 20:14:44 | itr #48 | optimization finished
2019-02-16 20:14:44 | itr #48 | Computing KL after
2019-02-16 20:14:44 | itr #48 | Computing loss after
2019-02-16 20:14:44 | itr #48 | Fitting baseli

0% [##############################] 100% | ETA: 00:00:00

2019-02-16 20:14:45 | itr #49 | Processing samples...



Total time elapsed: 00:00:00


2019-02-16 20:14:45 | itr #49 | Logging diagnostics...
2019-02-16 20:14:45 | itr #49 | Optimizing policy...
2019-02-16 20:14:45 | itr #49 | Computing loss before
2019-02-16 20:14:45 | itr #49 | Computing KL before
2019-02-16 20:14:45 | itr #49 | Optimizing
2019-02-16 20:14:45 | itr #49 | Start CG optimization: #parameters: 1476, #inputs: 800, #subsample_inputs: 800
2019-02-16 20:14:45 | itr #49 | computing loss before
2019-02-16 20:14:45 | itr #49 | performing update
2019-02-16 20:14:45 | itr #49 | computing gradient
2019-02-16 20:14:45 | itr #49 | gradient computed
2019-02-16 20:14:45 | itr #49 | computing descent direction
2019-02-16 20:14:45 | itr #49 | descent direction computed
2019-02-16 20:14:45 | itr #49 | backtrack iters: 0
2019-02-16 20:14:45 | itr #49 | computing loss after
2019-02-16 20:14:45 | itr #49 | optimization finished
2019-02-16 20:14:45 | itr #49 | Computing KL after
2019-02-16 20:14:45 | itr #49 | Computing loss after
2019-02-16 20:14:45 | itr #49 | Fitting baseli

6.4925

In [0]:
# list of water elements
wq_elements = ["turbidity", "hardness", "bacteria"]

In [13]:
# utility function for experiments below
def wqi_to_watersample(wqi):
  # https://stackoverflow.com/a/699891
  #x = "{0:b}".format(wqi)
  x = format(wqi, '03b')
  x = list(x)
  x = x[::-1] # reverse FIXME!?
  #print(x)
  x = [bool(int(y)) for y in x]
  x = dict(zip(wq_elements,x))
  return x

# test
for i in range(8):
  print(i, wqi_to_watersample(i))


0 {'turbidity': False, 'hardness': False, 'bacteria': False}
1 {'turbidity': True, 'hardness': False, 'bacteria': False}
2 {'turbidity': False, 'hardness': True, 'bacteria': False}
3 {'turbidity': True, 'hardness': True, 'bacteria': False}
4 {'turbidity': False, 'hardness': False, 'bacteria': True}
5 {'turbidity': True, 'hardness': False, 'bacteria': True}
6 {'turbidity': False, 'hardness': True, 'bacteria': True}
7 {'turbidity': True, 'hardness': True, 'bacteria': True}


In [14]:
# Utility function for experiments below

# convert to dicts for readability
#wtp_elements = [None, "turbidity", "hardness", "bacteria"]
# rename the elements for readability
wtp_elements = ["pipe", "sand filter", "softener", "UV"]

import pandas as pd

def convert_results_to_df(obs_all, act_all, out_all, rew_all):
    df = []
    for i in range(len(obs_all)):
      in_i = wqi_to_watersample(obs_all[i])
      wtp_i = [wtp_elements[x] for x in act_all[i]]
      out_i = wqi_to_watersample(out_all[i])
      df.append({
          "in_turbidity": in_i["turbidity"],
          "in_hardness": in_i["hardness"],
          "in_bacteria": in_i["bacteria"],
          "wtp/1": wtp_i[0],
          "wtp/2": wtp_i[1],
          "wtp/3": wtp_i[2],
          "wtp/4": wtp_i[3],
          "wtp/5": wtp_i[4],
          "out_turbidity": out_i["turbidity"],
          "out_hardness": out_i["hardness"],
          "out_bacteria": out_i["bacteria"],
          "reward": rew_all[i],
      })

    # gather results in pandas dataframe for simplicity of viewing
    df = pd.DataFrame(df)
    df = df[[
          "in_turbidity",
          "in_hardness",
          "in_bacteria",
          "wtp/1",
          "wtp/2",
          "wtp/3",
          "wtp/4",
          "wtp/5",
          "out_turbidity",
          "out_hardness",
          "out_bacteria",
          "reward",
    ]]

    df = df.sort_values(["in_turbidity", "in_hardness", "in_bacteria", "reward"]).set_index(["in_turbidity", "in_hardness", "in_bacteria"])
    
    return df

# test
print("TESTING")
convert_results_to_df([0,1,2], [ [0,1,2,0,0], [0,1,2,0,0], [0,1,2,0,0]], [0,1,2], [0,0,0])

TESTING


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,wtp/1,wtp/2,wtp/3,wtp/4,wtp/5,out_turbidity,out_hardness,out_bacteria,reward
in_turbidity,in_hardness,in_bacteria,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
False,False,False,pipe,sand filter,softener,pipe,pipe,False,False,False,0
False,True,False,pipe,sand filter,softener,pipe,pipe,False,True,False,0
True,False,False,pipe,sand filter,softener,pipe,pipe,True,False,False,0


# Results: test all 8 cases for combinations of 3 parameters

The below table shows that the trained designer proposes the correct element to treat the undesired water parameter, i.e. sand filter for turbidity, softener for hardness, UV for bacteria, or combinations thereof. It also simply installs pipes when no special treatment is further needed.

In [25]:

n_experiments2 = 8 # len(wq_all) # 100
obs_all2 = [None] * n_experiments2
act_all2 = [None] * n_experiments2
rew_all2 = [None] * n_experiments2
out_all2 = [None] * n_experiments2

print("start experiments")
for i in range(n_experiments2):
  #print("experiment ", i+1)

  # reset
  s0 = wqi_to_watersample(i)
  obs_initial = env.reset(s0 = s0)

  #print("-"*10)
  #print("obs init", obs_initial)
  
  # start
  obs_i = obs_initial
  act_list = []
  rew_sum = 0
  for j in range(5): # env.n_elements
    #action = env.action_space.sample()
    act_i, _ = policy.get_action(obs_i)
    #print("obs_i", obs_i, "act i", act_i)
    act_list.append(act_i)
    obs_i, rew_i, done, _ = env.step(act_i)
    rew_sum += rew_i
    
    if done: break
    
  obs_all2[i] = obs_initial
  out_all2[i] = obs_i
  act_all2[i] = act_list
  rew_all2[i] = rew_sum

#env.close()

print("done")

start experiments
done


In [0]:
df_deterministic = convert_results_to_df(obs_all2, act_all2, out_all2, rew_all2)

In [27]:
df_deterministic

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,wtp/1,wtp/2,wtp/3,wtp/4,wtp/5,out_turbidity,out_hardness,out_bacteria,reward
in_turbidity,in_hardness,in_bacteria,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
False,False,False,pipe,pipe,pipe,pipe,pipe,False,False,False,5.0
False,False,True,UV,pipe,pipe,pipe,pipe,False,False,False,6.0
False,True,False,softener,pipe,pipe,pipe,pipe,False,False,False,6.0
False,True,True,softener,UV,pipe,pipe,pipe,False,False,False,7.0
True,False,False,sand filter,pipe,pipe,pipe,pipe,False,False,False,6.0
True,False,True,sand filter,UV,pipe,pipe,pipe,False,False,False,7.0
True,True,False,sand filter,softener,pipe,pipe,pipe,False,False,False,7.0
True,True,True,UV,sand filter,softener,pipe,pipe,False,False,False,8.0
