In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import glob
import os
from multiprocessing import Process
import sys
sys.path.append('..')
from modules import utils, constants
from stable_baselines import DQN
# from stable_baselines.deepq.policies import MlpPolicy
from stable_baselines.deepq.policies import FeedForwardPolicy
from stable_baselines.common.callbacks import CheckpointCallback

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."


In [2]:
SEED = constants.SEED
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
tf.set_random_seed(SEED)
tf.compat.v1.set_random_seed(SEED)
SEED

42

In [3]:
# def custom_architecture(input_tensor, num_actions, net_arch):
    
#     with tf.variable_scope('custom_net', reuse=tf.AUTO_REUSE):
#         layer_1 = tf.layers.dense(input_tensor, units=net_arch[0], activation=tf.nn.relu)
#         layer_2 = tf.layers.dense(layer_1, units=net_arch[1], activation=tf.nn.relu)
#         layer_3 = tf.layers.dense(layer_2, units=net_arch[2], activation=tf.nn.relu)
#         layer_4 = tf.layers.dense(layer_3, units=net_arch[3], activation=tf.nn.relu)
#         output_layer = tf.layers.dense(layer_3, units=num_actions, activation=None)

#     return output_layer

In [4]:
# Custom MLP policy of three layers of size 256, 128, 64 and 32 each
class CustomDQNPolicy(FeedForwardPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomDQNPolicy, self).__init__(*args, **kwargs,
                                           layers=[64, 32],
                                           layer_norm=False,
                                           feature_extraction='mlp')

#### delete from here

#### end here

In [5]:
# policy_kwargs = {'net_arch': [256, 128, 64]}
# policy_kwargs = dict(net_arch=[256, 128, 64])

In [6]:
def stable_dueling_dqn(X_train, y_train, timesteps, save=False, log_path=None, log_prefix='dueling_dqn', filename=None, per=False):
    '''
    Creates and trains a dueling DQN model
    '''
    if per:
        log_prefix = 'dueling_dqn_per'
    training_env = utils.create_env(X_train, y_train)
    model = DQN(CustomDQNPolicy, training_env, verbose=1, seed=constants.SEED, learning_rate=0.0001, 
                buffer_size=2000000, learning_starts=50000, train_freq=4, target_network_update_freq=10000, 
                exploration_final_eps=0.05, n_cpu_tf_sess=1, double_q=False, prioritized_replay=per)
    
    checkpoint_callback = CheckpointCallback(save_freq=constants.CHECKPOINT_FREQ, save_path=log_path, 
                                             name_prefix=log_prefix)
    model.learn(total_timesteps=timesteps, log_interval=100000, callback=checkpoint_callback)
    if save:
        model.save(f'{log_path}/{filename}.pkl')
    training_env.close()
    return model


In [7]:
def stable_dueling_ddqn(X_train, y_train, timesteps, save=False, log_path=None, log_prefix='dueling_ddqn', filename=None, per=False):
    '''
    Creates and trains a dueling double DQN model
    '''
    if per:
        log_prefix = 'dueling_ddqn_per'
    training_env = utils.create_env(X_train, y_train)
    model = DQN(CustomDQNPolicy, training_env, verbose=1, seed=constants.SEED, learning_rate=0.0001, 
                buffer_size=2000000, learning_starts=50000, train_freq=4, target_network_update_freq=10000, 
                exploration_final_eps=0.05, n_cpu_tf_sess=1, prioritized_replay=per)
    
    checkpoint_callback = CheckpointCallback(save_freq=constants.CHECKPOINT_FREQ, save_path=log_path, 
                                             name_prefix=log_prefix)
    model.learn(total_timesteps=timesteps, log_interval=100000, callback=checkpoint_callback)
    if save:
        model.save(f'{log_path}/{filename}.pkl')
    training_env.close()
    return model


In [8]:
def run_dqn_model(model_type, steps):
    dir_name = f'seed_{SEED}_{steps}'
    parent_dir = f'../models/logs/{model_type}/arch_3_layers'
    path = os.path.join(parent_dir, dir_name)
  
    if model_type == 'dueling_dqn_per':
        model = stable_dueling_dqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_dqn_per', filename=f'dueling_dqn_pre_{steps}', per=True)
    elif model_type == 'dueling_ddqn_per':
        model = stable_dueling_ddqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_ddqn_per', filename=f'dueling_ddqn_per_{steps}', per=True)
    else:
        raise ValueError(f'Unknown model type - {model_type}!')
    return model

In [9]:
train_df = pd.read_csv('../data/train_set_basic.csv')
train_df = train_df.fillna(-1)
train_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1
3,1,0,0,0,0,0,0,1,1,0,...,0,1,0,0,0,0,0,1,1,1
4,1,0,0,1,1,1,0,0,0,0,...,0,1,0,0,0,1,0,0,1,1


In [10]:
train_df.label.value_counts()

0    26203
1    24197
Name: label, dtype: int64

In [11]:
X_train = train_df.iloc[:, 0:-1]
y_train = train_df.iloc[:, -1]
X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape, y_train.shape

((50400, 23), (50400,))

In [12]:
# model_names = ['dqn', 'ddqn', 'dueling_dqn', 'dueling_ddqn', 'dqn_per', 'ddqn_per', 'dueling_dqn_per', 
#                'dueling_ddqn_per']
model_names = ['dueling_dqn_per', 'dueling_ddqn_per']
procs = []
steps = int(10e7)

In [13]:
for name in model_names:
    proc = Process(target=run_dqn_model, args=(name, steps))
    procs.append(proc)
    proc.start()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
--------------------------------------
| % time spent exploring  | 95       |
| episodes                | 100000   |
| mean 100 episode reward | -0.9     |
| steps                   | 468959   |
| success rate            | 0.13     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 95       |
| episodes                | 100000   |
| mean 100 episode reward | -0.8     |
| steps                   | 468867   |
| success rate            | 0.17     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 90       |
| episodes    

--------------------------------------
| % time spent exploring  | 22       |
| episodes                | 1300000  |
| mean 100 episode reward | -0.3     |
| steps                   | 8115765  |
| success rate            | 0.49     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 20       |
| episodes                | 1300000  |
| mean 100 episode reward | -1.2     |
| steps                   | 8414309  |
| success rate            | 0.06     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 15       |
| episodes                | 1400000  |
| mean 100 episode reward | 0.2      |
| steps                   | 8937866  |
| success rate            | 0.69     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 7        |
| episodes                | 1400000  |
| mean 100 episode reward | -1.4     |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 2400000  |
| mean 100 episode reward | -1.3     |
| steps                   | 25084302 |
| success rate            | 0.1      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3300000  |
| mean 100 episode reward | 0.5      |
| steps                   | 25047031 |
| success rate            | 0.86     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3400000  |
| mean 100 episode reward | 0.4      |
| steps                   | 25847293 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 2500000  |
| mean 100 episode reward | -1.3     |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5300000  |
| mean 100 episode reward | 0.4      |
| steps                   | 40579658 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3500000  |
| mean 100 episode reward | -0.5     |
| steps                   | 42268807 |
| success rate            | 0.47     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5400000  |
| mean 100 episode reward | 0.4      |
| steps                   | 41320917 |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5500000  |
| mean 100 episode reward | 0.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 4400000  |
| mean 100 episode reward | -0.3     |
| steps                   | 57437373 |
| success rate            | 0.6      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 7500000  |
| mean 100 episode reward | 0.3      |
| steps                   | 56209575 |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 7600000  |
| mean 100 episode reward | 0.5      |
| steps                   | 56889481 |
| success rate            | 0.85     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 4500000  |
| mean 100 episode reward | -0.1     |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 9700000  |
| mean 100 episode reward | 0.5      |
| steps                   | 70404099 |
| success rate            | 0.83     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5300000  |
| mean 100 episode reward | -0.3     |
| steps                   | 72544968 |
| success rate            | 0.57     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 9800000  |
| mean 100 episode reward | 0.5      |
| steps                   | 71022476 |
| success rate            | 0.84     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 9900000  |
| mean 100 episode reward | 0.7      |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 11900000 |
| mean 100 episode reward | 0.5      |
| steps                   | 83525535 |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 12000000 |
| mean 100 episode reward | 0.6      |
| steps                   | 84099347 |
| success rate            | 0.89     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 12100000 |
| mean 100 episode reward | 0.5      |
| steps                   | 84647665 |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 6200000  |
| mean 100 episode reward | -0.2     |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14200000 |
| mean 100 episode reward | 0.6      |
| steps                   | 96022604 |
| success rate            | 0.86     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14300000 |
| mean 100 episode reward | 0.6      |
| steps                   | 96546384 |
| success rate            | 0.84     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 7000000  |
| mean 100 episode reward | -0.4     |
| steps                   | 99642308 |
| success rate            | 0.55     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 14400000 |
| mean 100 episode reward | 0.6      |
| steps                  

In [14]:
for proc in procs:
    proc.join()
print('All jobs completed and terminated successfully')

All jobs completed and terminated successfully
