In [2]:
import pandas as pd
import numpy as np
import random
import os
import torch
import sys
sys.path.append('../')
from modules import utils, constants
import warnings
warnings.filterwarnings('ignore')
from stable_baselines import DQN
from stable_baselines import bench, logger
# from stable_baselines.common.vec_env import DummyVecEnv, VecNormalize
import tensorflow
from stable_baselines.common.callbacks import CheckpointCallback
# from modules.env import LupusEnv

In [3]:
SEED = constants.SEED
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
tensorflow.set_random_seed(constants.SEED)

#### The data

In [7]:
# train_df = pd.read_csv('../data/25_jan/train_set_basic.csv')
train_df = pd.read_csv('../data/missingness/0/training_set.csv')
train_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1
3,1,0,0,0,0,0,0,1,1,0,...,0,1,0,0,0,0,0,1,1,1
4,1,0,0,1,1,1,0,0,0,0,...,0,1,0,0,0,1,0,0,1,1


In [8]:
train_df.cutaneous_lupus.value_counts()

0    44200
3     3120
1     2246
2      834
Name: cutaneous_lupus, dtype: int64

In [9]:
train_df[(train_df.ana==0) & (train_df.label==1)]

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label


In [10]:
train_df.isna().sum()

ana                             0
fever                           0
leukopenia                      0
thrombocytopenia                0
auto_immune_hemolysis           0
delirium                        0
psychosis                       0
seizure                         0
non_scarring_alopecia           0
oral_ulcers                     0
cutaneous_lupus                 0
pleural_effusion                0
pericardial_effusion            0
acute_pericarditis              0
joint_involvement               0
proteinuria                     0
anti_cardioliphin_antibodies    0
anti_β2gp1_antibodies           0
lupus_anti_coagulant            0
low_c3                          0
low_c4                          0
anti_dsdna_antibody             0
anti_smith_antibody             0
label                           0
dtype: int64

In [11]:
len(train_df)

50400

In [12]:
X_train = train_df.iloc[:, 0:-1]
y_train = train_df.iloc[:, -1]
X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape, y_train.shape

((50400, 23), (50400,))

In [13]:
action_list = list(constants.CLASS_DICT.keys()) + [col  for col in train_df.columns if col!='label']
action_list

['No lupus',
 'Lupus',
 'Inconclusive diagnosis',
 'ana',
 'fever',
 'leukopenia',
 'thrombocytopenia',
 'auto_immune_hemolysis',
 'delirium',
 'psychosis',
 'seizure',
 'non_scarring_alopecia',
 'oral_ulcers',
 'cutaneous_lupus',
 'pleural_effusion',
 'pericardial_effusion',
 'acute_pericarditis',
 'joint_involvement',
 'proteinuria',
 'anti_cardioliphin_antibodies',
 'anti_β2gp1_antibodies',
 'lupus_anti_coagulant',
 'low_c3',
 'low_c4',
 'anti_dsdna_antibody',
 'anti_smith_antibody']

#### The Model

In [14]:
training_env = utils.create_env(X_train, y_train)
training_env = bench.Monitor(training_env, logger.get_dir())

model = DQN('MlpPolicy', training_env, verbose=1, seed=constants.SEED, learning_rate=0.0001, buffer_size=1000000, 
            learning_starts=50000, train_freq=4, target_network_update_freq=10000, exploration_final_eps=0.05, 
            n_cpu_tf_sess=1, policy_kwargs=dict(dueling=False), double_q=False)

    
checkpoint_callback = CheckpointCallback(save_freq=100000, save_path='../models/logs/sb/dqn_seed_84', 
                                         name_prefix='dqn_vanilla_basic')

model.learn(total_timesteps=200000000, log_interval=100000, callback=checkpoint_callback)

# Save the trained DQN agent
model.save('../models/22_mar/vanilla_dqn_seed_84_lupus_diagnosis')
# training_env.close()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use tf.cast instead.
--------------------------------------
| % time spent exploring  | 97       |
| episodes                | 100000   |
| mean 100 episode reward | -0.8     |
| steps                   | 466700   |
| success rate            | 0.15     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 95       |
| episodes                | 200000   |
| mean 100 episode reward | -0.7     |
| steps                   | 941460   |
| success rate            | 0.23     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 93       |
| episodes                | 300000   |
| mean 100 episode reward | -0.8     |
| steps                   | 1422072  |
| success rate            | 0.15     |
--------------------------------------


--------------------------------------
| % time spent exploring  | 10       |
| episodes                | 2800000  |
| mean 100 episode reward | -0.6     |
| steps                   | 18743917 |
| success rate            | 0.34     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 6        |
| episodes                | 2900000  |
| mean 100 episode reward | -0.7     |
| steps                   | 19680361 |
| success rate            | 0.28     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3000000  |
| mean 100 episode reward | -0.6     |
| steps                   | 20601463 |
| success rate            | 0.35     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 3100000  |
| mean 100 episode reward | -0.4     |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 5900000  |
| mean 100 episode reward | -0.6     |
| steps                   | 48071315 |
| success rate            | 0.33     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 6000000  |
| mean 100 episode reward | -0.5     |
| steps                   | 48943617 |
| success rate            | 0.35     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 6100000  |
| mean 100 episode reward | -0.6     |
| steps                   | 49830027 |
| success rate            | 0.32     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 6200000  |
| mean 100 episode reward | -0.6     |
| steps                  

--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 9000000  |
| mean 100 episode reward | -0.5     |
| steps                   | 76323724 |
| success rate            | 0.37     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 9100000  |
| mean 100 episode reward | -0.5     |
| steps                   | 77283623 |
| success rate            | 0.4      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 9200000  |
| mean 100 episode reward | -0.1     |
| steps                   | 78274623 |
| success rate            | 0.59     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 5        |
| episodes                | 9300000  |
| mean 100 episode reward | -0.3     |
| steps                  

---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 12000000  |
| mean 100 episode reward | -0.3      |
| steps                   | 110451850 |
| success rate            | 0.53      |
---------------------------------------
---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 12100000  |
| mean 100 episode reward | -0.5      |
| steps                   | 111517605 |
| success rate            | 0.42      |
---------------------------------------
---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 12200000  |
| mean 100 episode reward | -0.4      |
| steps                   | 112779221 |
| success rate            | 0.51      |
---------------------------------------
---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 12300000  |
| mean 100 episode reward | -0.2      |


---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 15000000  |
| mean 100 episode reward | -0.1      |
| steps                   | 151034798 |
| success rate            | 0.67      |
---------------------------------------
---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 15100000  |
| mean 100 episode reward | -0.4      |
| steps                   | 152539187 |
| success rate            | 0.51      |
---------------------------------------
---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 15200000  |
| mean 100 episode reward | -0.4      |
| steps                   | 154052497 |
| success rate            | 0.53      |
---------------------------------------
---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 15300000  |
| mean 100 episode reward | -0.4      |


---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 18000000  |
| mean 100 episode reward | -0.2      |
| steps                   | 196409693 |
| success rate            | 0.62      |
---------------------------------------
---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 18100000  |
| mean 100 episode reward | -0.2      |
| steps                   | 197877455 |
| success rate            | 0.61      |
---------------------------------------
---------------------------------------
| % time spent exploring  | 5         |
| episodes                | 18200000  |
| mean 100 episode reward | -0.2      |
| steps                   | 199342086 |
| success rate            | 0.66      |
---------------------------------------
