In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import glob
import os
from multiprocessing import Process
import sys
sys.path.append('..')
from modules import utils, constants
from stable_baselines import DQN
from stable_baselines.deepq.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.common.callbacks import CheckpointCallback

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."





In [2]:
SEED = constants.SEED
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
tf.set_random_seed(SEED)
tf.compat.v1.set_random_seed(SEED)
SEED

42

In [3]:
constants.BETA, constants.CHECKPOINT_FREQ

(9, 1000000)

In [None]:
def custom_mlp(scaled_obs, **kwargs):
    layer_1 = tf.layers.dense(scaled_obs, 64, activation=tf.nn.relu)
    dropout_1 = tf.layers.dropout(layer_1, rate=0.5)
    layer_2 = tf.layers.dense(dropout_1, 64, activation=tf.nn.relu)
    dropout_2 = tf.layers.dropout(layer_2, rate=0.5)
    layer_3 = tf.layers.dense(dropout_2, 64, activation=tf.nn.relu)
    dropout_3 = tf.layers.dropout(layer_3, rate=0.5)
    layer_4 = tf.layers.dense(dropout_3, 64, activation=tf.nn.relu)
    return layer_4

In [None]:
class CustomDQN(DQN):
    def __init__(self, policy, env, **kwargs):
        super(CustomDQN, self).__init__(policy, env, **kwargs)

In [4]:
# class CustomMlpPolicy(MlpPolicy):
#     def __init__(self, *args, **kwargs):
#         super(CustomMlpPolicy, self).__init__(*args, **kwargs, layers=[128, 128, 128])

In [5]:
# def custom_network(input_shape, num_classes):
#     inputs = tf.keras.layers.Input(shape=input_shape)
#     shared_layer1 = tf.keras.layers.Dense(128, activation='relu')(inputs)
#     shared_layer2 = tf.keras.layers.Dense(128, activation='relu')(shared_layer1)
#     shared_layer3 = tf.keras.layers.Dense(128, activation='relu')(shared_layer2)
#     out = tf.keras.layers.Dense(num_actions, activation='softmax')(shared_layer3)
#     return inputs, out

#modify the 24, maybe num_actions etc.
# model = DQN('MlpPolicy', env, policy_kwargs={'net_arch': (24,), 'custom_objects': {'num_actions': num_classes}}, verbose=1)

In [6]:
def stable_dueling_dqn(X_train, y_train, timesteps, save=False, log_path=None, log_prefix='dueling_dqn', filename=None, per=False):
    '''
    Creates and trains a dueling DQN model
    '''
    if per:
        log_prefix = 'dueling_dqn_per'
    training_env = utils.create_env(X_train, y_train)
    model = customDQN('MlpPolicy', training_env, verbose=1, seed=constants.SEED, learning_rate=0.0001, 
                      buffer_size=1000000, learning_starts=50000, train_freq=4, target_network_update_freq=10000, 
                      exploration_final_eps=0.05, n_cpu_tf_sess=1, double_q=False, prioritized_replay=per, 
                      policy_kwargs={"net_arch": [dict(pi=[64, 64, 64, 64], vf=[64, 64, 64, 64])]})

    checkpoint_callback = CheckpointCallback(save_freq=constants.CHECKPOINT_FREQ, save_path=log_path, 
                                             name_prefix=log_prefix)
    model.learn(total_timesteps=timesteps, log_interval=100000, callback=checkpoint_callback)
    if save:
        model.save(f'{log_path}/{filename}.pkl')
    training_env.close()
    return model

In [7]:
def stable_dueling_ddqn(X_train, y_train, timesteps, save=False, log_path=None, log_prefix='dueling_ddqn', filename=None, per=False):
    '''
    Creates and trains a dueling double DQN model
    '''
    if per:
        log_prefix = 'dueling_ddqn_per'
    training_env = utils.create_env(X_train, y_train)
    model = customDQN('MlpPolicy', training_env, verbose=1, seed=constants.SEED, learning_rate=0.0001, 
                      buffer_size=1000000, learning_starts=50000, train_freq=4, target_network_update_freq=10000, 
                      exploration_final_eps=0.05, n_cpu_tf_sess=1, prioritized_replay=per, 
                     policy_kwargs={"net_arch": [dict(pi=[64, 64, 64, 64], vf=[64, 64, 64, 64])]})
    
    checkpoint_callback = CheckpointCallback(save_freq=constants.CHECKPOINT_FREQ, save_path=log_path, 
                                             name_prefix=log_prefix)
    model.learn(total_timesteps=timesteps, log_interval=100000, callback=checkpoint_callback)
    if save:
        model.save(f'{log_path}/{filename}.pkl')
    training_env.close()
    return model

In [8]:
def run_dqn_model(model_type, steps):
    dir_name = f'seed_{SEED}_{steps}'
    parent_dir = f'../models/logs/{model_type}/missingness/0.1/no_step_penalty/custom_arch_3_layers_dropout'
    path = os.path.join(parent_dir, dir_name)
    os.mkdir(path)
    
    if model_type == 'dueling_dqn_per':
        model = stable_dueling_dqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_dqn_per', 
                                   filename=f'dueling_dqn_per_{steps}', per=True)
    elif model_type == 'dueling_ddqn_per':
        model =stable_dueling_ddqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_ddqn_per',
                                    filename=f'dueling_ddqn_per_{steps}', per=True)
    else:
        raise ValueError(f'Unknown model type - {model_type}!')
    return model

In [9]:
train_df = pd.read_csv('../new_data/train_set_missingness_0.1.csv')
train_df = train_df.fillna(-1)
train_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,proteinuria,biopsy_proven_lupus_nephritis,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,1,1.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1
1,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1
2,0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,...,1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,1.0,3.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,1


In [10]:
train_df.label.value_counts()

0    25240
1    25160
Name: label, dtype: int64

In [11]:
train_df.iloc[90]

ana                              0.0
fever                            0.0
leukopenia                       0.0
thrombocytopenia                 0.0
auto_immune_hemolysis            0.0
delirium                         0.0
psychosis                        0.0
seizure                          0.0
non_scarring_alopecia            0.0
oral_ulcers                      0.0
cutaneous_lupus                  0.0
pleural_effusion                 0.0
pericardial_effusion             0.0
acute_pericarditis               0.0
joint_involvement                0.0
proteinuria                      0.0
biopsy_proven_lupus_nephritis    0.0
anti_cardioliphin_antibodies     0.0
anti_β2gp1_antibodies            0.0
lupus_anti_coagulant             0.0
low_c3                           1.0
low_c4                           0.0
anti_dsdna_antibody              0.0
anti_smith_antibody             -1.0
label                            0.0
Name: 90, dtype: float64

In [12]:
X_train = train_df.iloc[:, 0:-1]
y_train = train_df.iloc[:, -1]
X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape, y_train.shape

((50400, 24), (50400,))

In [13]:
model_names = ['dueling_dqn_per', 'dueling_ddqn_per']
procs = []
steps = int(100e6)

In [14]:
for name in model_names:
#     run_dqn_model(name, steps)
    proc = Process(target=run_dqn_model, args=(name, steps))
    procs.append(proc)
    proc.start()

The environment seed is [42]
The environment seed is [42]








Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where












Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where










--------------------------------------
| % time spent exploring  | 95       |
| episodes                | 100000   |
| mean 100 episode reward | -0.7     |
| steps                   | 481811   |
| success rate            | 0.15     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 95       |
| episodes                | 100000   |
| mean 100 episode reward | -0.6     |
| steps                   | 481558   |
| success rate            | 0.2      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 90       |
| episodes                | 200000   |
| mean 100 episode reward | -0.7     |
| steps                   | 974662   |
| success rate            | 0.14     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 90       |
| episodes                | 200000   |
| mean 100 episode reward | -0.7     |
| steps              

--------------------------------------
| % time spent exploring  | 28       |
| episodes                | 1500000  |
| mean 100 episode reward | 0.4      |
| steps                   | 7487105  |
| success rate            | 0.7      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 25       |
| episodes                | 1600000  |
| mean 100 episode reward | 0.3      |
| steps                   | 7808922  |
| success rate            | 0.63     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 21       |
| episodes                | 1400000  |
| mean 100 episode reward | -0.5     |
| steps                   | 8271688  |
| success rate            | 0.24     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 22       |
| episodes                | 1700000  |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 2600000  |
| mean 100 episode reward | 0.6      |
| steps                   | 12437455 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3400000  |
| mean 100 episode reward | 0.6      |
| steps                   | 12176759 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 2700000  |
| mean 100 episode reward | 0.5      |
| steps                   | 12691347 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3500000  |
| mean 100 episode reward | 0.7      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5000000  |
| mean 100 episode reward | 0.6      |
| steps                   | 15691738 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 4100000  |
| mean 100 episode reward | 0.6      |
| steps                   | 16182318 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5100000  |
| mean 100 episode reward | 0.5      |
| steps                   | 15907093 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 4200000  |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 6600000  |
| mean 100 episode reward | 0.4      |
| steps                   | 19174619 |
| success rate            | 0.71     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5600000  |
| mean 100 episode reward | 0.5      |
| steps                   | 19746779 |
| success rate            | 0.74     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 6700000  |
| mean 100 episode reward | 0.6      |
| steps                   | 19400796 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5700000  |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 8200000  |
| mean 100 episode reward | 0.5      |
| steps                   | 22667207 |
| success rate            | 0.76     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 7100000  |
| mean 100 episode reward | 0.5      |
| steps                   | 23396332 |
| success rate            | 0.73     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 8300000  |
| mean 100 episode reward | 0.4      |
| steps                   | 22888909 |
| success rate            | 0.71     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 7200000  |
| mean 100 episode reward | 0.4      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 9800000  |
| mean 100 episode reward | 0.5      |
| steps                   | 26163849 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 8600000  |
| mean 100 episode reward | 0.5      |
| steps                   | 26865043 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 9900000  |
| mean 100 episode reward | 0.5      |
| steps                   | 26377977 |
| success rate            | 0.76     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 8700000  |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 11400000 |
| mean 100 episode reward | 0.5      |
| steps                   | 29671743 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 10100000 |
| mean 100 episode reward | 0.6      |
| steps                   | 30359893 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 11500000 |
| mean 100 episode reward | 0.7      |
| steps                   | 29887754 |
| success rate            | 0.83     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 10200000 |
| mean 100 episode reward | 0        |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 11600000 |
| mean 100 episode reward | 0.5      |
| steps                   | 33825126 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 13000000 |
| mean 100 episode reward | 0.5      |
| steps                   | 33157309 |
| success rate            | 0.76     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 11700000 |
| mean 100 episode reward | 0.4      |
| steps                   | 34045941 |
| success rate            | 0.72     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 13100000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 13100000 |
| mean 100 episode reward | 0.5      |
| steps                   | 37265592 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14600000 |
| mean 100 episode reward | 0.6      |
| steps                   | 36651290 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 13200000 |
| mean 100 episode reward | 0.6      |
| steps                   | 37480934 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14700000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 16100000 |
| mean 100 episode reward | 0.4      |
| steps                   | 39926355 |
| success rate            | 0.71     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14700000 |
| mean 100 episode reward | 0.5      |
| steps                   | 40820892 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 16200000 |
| mean 100 episode reward | 0.4      |
| steps                   | 40146297 |
| success rate            | 0.71     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14800000 |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 17600000 |
| mean 100 episode reward | 0.6      |
| steps                   | 43208808 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 16300000 |
| mean 100 episode reward | 0.6      |
| steps                   | 44388101 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 17700000 |
| mean 100 episode reward | 0.5      |
| steps                   | 43429551 |
| success rate            | 0.73     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 16400000 |
| mean 100 episode reward | 0.3      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 19200000 |
| mean 100 episode reward | 0.5      |
| steps                   | 46699898 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 17800000 |
| mean 100 episode reward | 0.5      |
| steps                   | 47747855 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 19300000 |
| mean 100 episode reward | 0.6      |
| steps                   | 46921941 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 17900000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 19300000 |
| mean 100 episode reward | 0.6      |
| steps                   | 51091398 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 20800000 |
| mean 100 episode reward | 0.4      |
| steps                   | 50205232 |
| success rate            | 0.71     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 19400000 |
| mean 100 episode reward | 0.7      |
| steps                   | 51310624 |
| success rate            | 0.86     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 20900000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 22300000 |
| mean 100 episode reward | 0.5      |
| steps                   | 53487956 |
| success rate            | 0.74     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 20900000 |
| mean 100 episode reward | 0.5      |
| steps                   | 54675695 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 22400000 |
| mean 100 episode reward | 0.5      |
| steps                   | 53700943 |
| success rate            | 0.76     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 21000000 |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 22400000 |
| mean 100 episode reward | 0.6      |
| steps                   | 58051686 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 23900000 |
| mean 100 episode reward | 0.6      |
| steps                   | 56974492 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 22500000 |
| mean 100 episode reward | 0.6      |
| steps                   | 58276506 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 24000000 |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 25400000 |
| mean 100 episode reward | 0.7      |
| steps                   | 60279974 |
| success rate            | 0.83     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 24000000 |
| mean 100 episode reward | 0.4      |
| steps                   | 61662824 |
| success rate            | 0.71     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 25500000 |
| mean 100 episode reward | 0.6      |
| steps                   | 60502931 |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 24100000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 25500000 |
| mean 100 episode reward | 0.6      |
| steps                   | 65040615 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 27000000 |
| mean 100 episode reward | 0.6      |
| steps                   | 63806071 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 25600000 |
| mean 100 episode reward | 0.6      |
| steps                   | 65265034 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 27100000 |
| mean 100 episode reward | 0.7      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 28500000 |
| mean 100 episode reward | 0.5      |
| steps                   | 67125702 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 27100000 |
| mean 100 episode reward | 0.7      |
| steps                   | 68661423 |
| success rate            | 0.84     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 28600000 |
| mean 100 episode reward | 0.7      |
| steps                   | 67349973 |
| success rate            | 0.85     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 27200000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 28600000 |
| mean 100 episode reward | 0.7      |
| steps                   | 72048419 |
| success rate            | 0.86     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 30100000 |
| mean 100 episode reward | 0.6      |
| steps                   | 70651182 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 28700000 |
| mean 100 episode reward | 0.4      |
| steps                   | 72279910 |
| success rate            | 0.71     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 30200000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 31600000 |
| mean 100 episode reward | 0.5      |
| steps                   | 73982436 |
| success rate            | 0.74     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 30200000 |
| mean 100 episode reward | 0.6      |
| steps                   | 75713510 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 31700000 |
| mean 100 episode reward | 0.5      |
| steps                   | 74201990 |
| success rate            | 0.76     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 30300000 |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 31700000 |
| mean 100 episode reward | 0.5      |
| steps                   | 79162123 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 33200000 |
| mean 100 episode reward | 0.6      |
| steps                   | 77541374 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 31800000 |
| mean 100 episode reward | 0.5      |
| steps                   | 79386787 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 33300000 |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 33200000 |
| mean 100 episode reward | 0.6      |
| steps                   | 82618752 |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 33300000 |
| mean 100 episode reward | 0.5      |
| steps                   | 82850169 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 34800000 |
| mean 100 episode reward | 0.6      |
| steps                   | 81087870 |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 33400000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 36300000 |
| mean 100 episode reward | 0.6      |
| steps                   | 84425321 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 34800000 |
| mean 100 episode reward | 0.5      |
| steps                   | 86297112 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 36400000 |
| mean 100 episode reward | 0.6      |
| steps                   | 84649927 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 34900000 |
| mean 100 episode reward | 0.4      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 36300000 |
| mean 100 episode reward | 0.6      |
| steps                   | 89850192 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 37900000 |
| mean 100 episode reward | 0.7      |
| steps                   | 87972335 |
| success rate            | 0.84     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 36400000 |
| mean 100 episode reward | 0.7      |
| steps                   | 90087309 |
| success rate            | 0.83     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 38000000 |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 37800000 |
| mean 100 episode reward | 0.6      |
| steps                   | 93385249 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 39500000 |
| mean 100 episode reward | 0.6      |
| steps                   | 91550414 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 37900000 |
| mean 100 episode reward | 0.4      |
| steps                   | 93622733 |
| success rate            | 0.7      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 39600000 |
| mean 100 episode reward | 0.7      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 39300000 |
| mean 100 episode reward | 0.7      |
| steps                   | 96948561 |
| success rate            | 0.84     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 41100000 |
| mean 100 episode reward | 0.7      |
| steps                   | 95117573 |
| success rate            | 0.86     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 39400000 |
| mean 100 episode reward | 0.6      |
| steps                   | 97177349 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 41200000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 42900000 |
| mean 100 episode reward | 0.6      |
| steps                   | 99263414 |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 43000000 |
| mean 100 episode reward | 0.4      |
| steps                   | 99486590 |
| success rate            | 0.72     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 43100000 |
| mean 100 episode reward | 0.5      |
| steps                   | 99708836 |
| success rate            | 0.74     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 43200000 |
| mean 100 episode reward | 0.6      |
| steps                  

In [15]:
for proc in procs:
    proc.join()
print('All jobs completed and terminated successfully')

All jobs completed and terminated successfully
