In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import glob
import os
from multiprocessing import Process
import sys
sys.path.append('..')
from modules import utils, constants

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."





In [2]:
SEED = constants.SEED
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
tf.set_random_seed(SEED)
tf.compat.v1.set_random_seed(SEED)
SEED

42

In [3]:
constants.BETA, constants.CHECKPOINT_FREQ

9

In [4]:
def run_dqn_model(model_type, steps):
    dir_name = f'seed_{SEED}_{steps}'
    parent_dir = f'../models/logs/{model_type}/missingness/0.1/biopsy_9'
    path = os.path.join(parent_dir, dir_name)
    os.mkdir(path)
  
    if model_type=='dqn':
        model = utils.stable_vanilla_dqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dqn', filename=f'dqn_{steps}')
    elif model_type=='ddqn':
        model = utils.stable_double_dqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='ddqn', filename=f'ddqn_{steps}')
    elif model_type== 'dueling_dqn':
        model = utils.stable_dueling_dqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_dqn', filename=f'dueling_dqn_{steps}')
    elif model_type == 'dueling_ddqn':
        model = utils.stable_dueling_ddqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_ddqn', filename=f'dueling_ddqn_{steps}')
    elif model_type =='dqn_per':
        model = utils.stable_vanilla_dqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dqn_per', filename=f'dqn_per_{steps}', per=True)
    elif model_type == 'ddqn_per':
        model = utils.stable_double_dqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='ddqn_per', filename=f'ddqn_per_{steps}', per=True)
    elif model_type == 'dueling_dqn_per':
        model = utils.stable_dueling_dqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_dqn_per', filename=f'dueling_dqn_per_{steps}', per=True)
    elif model_type == 'dueling_ddqn_per':
        model = utils.stable_dueling_ddqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_ddqn_per', filename=f'dueling_ddqn_per_{steps}', per=True)
    else:
        raise ValueError(f'Unknown model type - {model_type}!')
    return model

In [5]:
train_df = pd.read_csv('../new_data/train_set_missingness_0.1.csv')
train_df = train_df.fillna(-1)
train_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,proteinuria,biopsy_proven_lupus_nephritis,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,1,1.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1
1,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1
2,0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,...,1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,1.0,3.0,0.0,-1.0,0.0,0.0,-1.0,0.0,0.0,1


In [6]:
train_df.label.value_counts()

0    25240
1    25160
Name: label, dtype: int64

In [7]:
train_df.iloc[90]

ana                              0.0
fever                            0.0
leukopenia                       0.0
thrombocytopenia                 0.0
auto_immune_hemolysis            0.0
delirium                         0.0
psychosis                        0.0
seizure                          0.0
non_scarring_alopecia            0.0
oral_ulcers                      0.0
cutaneous_lupus                  0.0
pleural_effusion                 0.0
pericardial_effusion             0.0
acute_pericarditis               0.0
joint_involvement                0.0
proteinuria                      0.0
biopsy_proven_lupus_nephritis    0.0
anti_cardioliphin_antibodies     0.0
anti_β2gp1_antibodies            0.0
lupus_anti_coagulant             0.0
low_c3                           1.0
low_c4                           0.0
anti_dsdna_antibody              0.0
anti_smith_antibody             -1.0
label                            0.0
Name: 90, dtype: float64

In [8]:
X_train = train_df.iloc[:, 0:-1]
y_train = train_df.iloc[:, -1]
X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape, y_train.shape

((50400, 24), (50400,))

In [9]:
# model_names = ['dqn', 'ddqn', 'dueling_dqn', 'dueling_ddqn', 'dqn_per', 'ddqn_per', 'dueling_dqn_per', 
#                'dueling_ddqn_per']
model_names = ['dueling_dqn_per', 'dueling_ddqn_per']
procs = []
steps = int(20e6)

In [10]:
# run_dqn_model(model_names[0],steps)

In [11]:
for name in model_names:
#     run_dqn_model(name, steps)
    proc = Process(target=run_dqn_model, args=(name, steps))
    procs.append(proc)
    proc.start()

The environment seed is [42]









Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where












Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where










--------------------------------------
| % time spent exploring  | 95       |
| episodes                | 100000   |
| mean 100 episode reward | -0.7     |
| steps                   | 481641   |
| success rate            | 0.19     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 95       |
| episodes                | 100000   |
| mean 100 episode reward | -0.6     |
| steps                   | 481622   |
| success rate            | 0.23     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 90       |
| episodes                | 200000   |
| mean 100 episode reward | -0.7     |
| steps                   | 976217   |
| success rate            | 0.16     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 90       |
| episodes                | 200000   |
| mean 100 episode reward | -0.8     |
| steps              

--------------------------------------
| % time spent exploring  | 27       |
| episodes                | 1500000  |
| mean 100 episode reward | 0.1      |
| steps                   | 7591300  |
| success rate            | 0.59     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 24       |
| episodes                | 1600000  |
| mean 100 episode reward | -0.1     |
| steps                   | 7960906  |
| success rate            | 0.49     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 21       |
| episodes                | 1400000  |
| mean 100 episode reward | -0.6     |
| steps                   | 8255693  |
| success rate            | 0.25     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 21       |
| episodes                | 1700000  |
| mean 100 episode reward | 0.2      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3600000  |
| mean 100 episode reward | 0.4      |
| steps                   | 13023048 |
| success rate            | 0.73     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 2400000  |
| mean 100 episode reward | 0.4      |
| steps                   | 13410644 |
| success rate            | 0.69     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3700000  |
| mean 100 episode reward | 0.6      |
| steps                   | 13248608 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 2500000  |
| mean 100 episode reward | 0.1      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3800000  |
| mean 100 episode reward | 0.6      |
| steps                   | 17071227 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5300000  |
| mean 100 episode reward | 0.6      |
| steps                   | 16777085 |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3900000  |
| mean 100 episode reward | 0.7      |
| steps                   | 17309533 |
| success rate            | 0.84     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5400000  |
| mean 100 episode reward | 0.4      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 6800000  |
| mean 100 episode reward | 0.2      |
| steps                   | 20058877 |
| success rate            | 0.61     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5400000  |
| mean 100 episode reward | 0.4      |
| steps                   | 20756891 |
| success rate            | 0.7      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 6900000  |
| mean 100 episode reward | 0.5      |
| steps                   | 20275712 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5500000  |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 6900000  |
| mean 100 episode reward | 0.5      |
| steps                   | 24115171 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 8400000  |
| mean 100 episode reward | 0.5      |
| steps                   | 23545118 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 7000000  |
| mean 100 episode reward | 0.5      |
| steps                   | 24342357 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 8500000  |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 9900000  |
| mean 100 episode reward | 0.5      |
| steps                   | 26816496 |
| success rate            | 0.76     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 8500000  |
| mean 100 episode reward | 0.4      |
| steps                   | 27668888 |
| success rate            | 0.73     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 10000000 |
| mean 100 episode reward | 0.4      |
| steps                   | 27035091 |
| success rate            | 0.72     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 8600000  |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 10000000 |
| mean 100 episode reward | 0.4      |
| steps                   | 30983152 |
| success rate            | 0.72     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 11500000 |
| mean 100 episode reward | 0.6      |
| steps                   | 30280016 |
| success rate            | 0.83     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 10100000 |
| mean 100 episode reward | 0.6      |
| steps                   | 31198591 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 11600000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 11600000 |
| mean 100 episode reward | 0.5      |
| steps                   | 34478804 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 13000000 |
| mean 100 episode reward | 0.5      |
| steps                   | 33537082 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 11700000 |
| mean 100 episode reward | 0.4      |
| steps                   | 34700102 |
| success rate            | 0.72     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 13100000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14500000 |
| mean 100 episode reward | 0.5      |
| steps                   | 36782811 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 13200000 |
| mean 100 episode reward | 0.6      |
| steps                   | 37978377 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14600000 |
| mean 100 episode reward | 0.6      |
| steps                   | 36998988 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 13300000 |
| mean 100 episode reward | 0.4      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14700000 |
| mean 100 episode reward | 0.6      |
| steps                   | 41240566 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 16100000 |
| mean 100 episode reward | 0.4      |
| steps                   | 40246378 |
| success rate            | 0.73     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14800000 |
| mean 100 episode reward | 0.5      |
| steps                   | 41462592 |
| success rate            | 0.74     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 16200000 |
| mean 100 episode reward | 0.3      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 16300000 |
| mean 100 episode reward | 0.6      |
| steps                   | 44746376 |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 17600000 |
| mean 100 episode reward | 0.6      |
| steps                   | 43501552 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 16400000 |
| mean 100 episode reward | 0.5      |
| steps                   | 44961458 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 17700000 |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 19100000 |
| mean 100 episode reward | 0.6      |
| steps                   | 46740895 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 17900000 |
| mean 100 episode reward | 0.6      |
| steps                   | 48238798 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 19200000 |
| mean 100 episode reward | 0.5      |
| steps                   | 46954843 |
| success rate            | 0.76     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 18000000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 19400000 |
| mean 100 episode reward | 0.7      |
| steps                   | 51514034 |
| success rate            | 0.85     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 20700000 |
| mean 100 episode reward | 0.5      |
| steps                   | 50206145 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 19500000 |
| mean 100 episode reward | 0.6      |
| steps                   | 51730861 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 20800000 |
| mean 100 episode reward | 0.4      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 21000000 |
| mean 100 episode reward | 0.5      |
| steps                   | 55018966 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 22200000 |
| mean 100 episode reward | 0.6      |
| steps                   | 53449519 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 21100000 |
| mean 100 episode reward | 0.5      |
| steps                   | 55237583 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 22300000 |
| mean 100 episode reward | 0.3      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 23700000 |
| mean 100 episode reward | 0.7      |
| steps                   | 56702672 |
| success rate            | 0.85     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 22600000 |
| mean 100 episode reward | 0.5      |
| steps                   | 58519005 |
| success rate            | 0.76     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 23800000 |
| mean 100 episode reward | 0.4      |
| steps                   | 56928247 |
| success rate            | 0.73     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 22700000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 24100000 |
| mean 100 episode reward | 0.6      |
| steps                   | 61818374 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 25300000 |
| mean 100 episode reward | 0.6      |
| steps                   | 60163381 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 24200000 |
| mean 100 episode reward | 0.6      |
| steps                   | 62033238 |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 25400000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 26800000 |
| mean 100 episode reward | 0.6      |
| steps                   | 63404192 |
| success rate            | 0.83     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 25700000 |
| mean 100 episode reward | 0.5      |
| steps                   | 65310063 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 26900000 |
| mean 100 episode reward | 0.4      |
| steps                   | 63623907 |
| success rate            | 0.73     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 25800000 |
| mean 100 episode reward | 0.4      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 28300000 |
| mean 100 episode reward | 0.6      |
| steps                   | 66660263 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 27300000 |
| mean 100 episode reward | 0.6      |
| steps                   | 68821407 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 28400000 |
| mean 100 episode reward | 0.6      |
| steps                   | 66874789 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 27400000 |
| mean 100 episode reward | 0.4      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 28800000 |
| mean 100 episode reward | 0.5      |
| steps                   | 72131066 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 29900000 |
| mean 100 episode reward | 0.6      |
| steps                   | 70118590 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 28900000 |
| mean 100 episode reward | 0.6      |
| steps                   | 72347913 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 30000000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 31400000 |
| mean 100 episode reward | 0.6      |
| steps                   | 73373888 |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 30400000 |
| mean 100 episode reward | 0.5      |
| steps                   | 75637140 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 31500000 |
| mean 100 episode reward | 0.5      |
| steps                   | 73592564 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 30500000 |
| mean 100 episode reward | 0.7      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 31900000 |
| mean 100 episode reward | 0.5      |
| steps                   | 78942215 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 33000000 |
| mean 100 episode reward | 0.5      |
| steps                   | 76846868 |
| success rate            | 0.76     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 32000000 |
| mean 100 episode reward | 0.6      |
| steps                   | 79161551 |
| success rate            | 0.83     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 33100000 |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 33500000 |
| mean 100 episode reward | 0.4      |
| steps                   | 82453282 |
| success rate            | 0.73     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 34500000 |
| mean 100 episode reward | 0.5      |
| steps                   | 80122269 |
| success rate            | 0.74     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 33600000 |
| mean 100 episode reward | 0.4      |
| steps                   | 82670493 |
| success rate            | 0.7      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 34600000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 36000000 |
| mean 100 episode reward | 0.4      |
| steps                   | 83393434 |
| success rate            | 0.72     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 35100000 |
| mean 100 episode reward | 0.6      |
| steps                   | 85983709 |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 36100000 |
| mean 100 episode reward | 0.6      |
| steps                   | 83611459 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 35200000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 36600000 |
| mean 100 episode reward | 0.5      |
| steps                   | 89286506 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 37600000 |
| mean 100 episode reward | 0.6      |
| steps                   | 86874807 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 36700000 |
| mean 100 episode reward | 0.5      |
| steps                   | 89509981 |
| success rate            | 0.75     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 37700000 |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 39100000 |
| mean 100 episode reward | 0.5      |
| steps                   | 90156825 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 38200000 |
| mean 100 episode reward | 0.6      |
| steps                   | 92831936 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 39200000 |
| mean 100 episode reward | 0.5      |
| steps                   | 90372685 |
| success rate            | 0.78     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 38300000 |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 40600000 |
| mean 100 episode reward | 0.3      |
| steps                   | 93435589 |
| success rate            | 0.67     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 39800000 |
| mean 100 episode reward | 0.4      |
| steps                   | 96348866 |
| success rate            | 0.72     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 40700000 |
| mean 100 episode reward | 0.5      |
| steps                   | 93652018 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 39900000 |
| mean 100 episode reward | 0.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 41300000 |
| mean 100 episode reward | 0.5      |
| steps                   | 99668802 |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 42200000 |
| mean 100 episode reward | 0.4      |
| steps                   | 96939563 |
| success rate            | 0.7      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 41400000 |
| mean 100 episode reward | 0.6      |
| steps                   | 99881796 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 42300000 |
| mean 100 episode reward | 0.5      |
| steps                  

In [12]:
for proc in procs:
    proc.join()
print('All jobs completed and terminated successfully')

All jobs completed and terminated successfully
