In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import glob
import os
from multiprocessing import Process
import sys
sys.path.append('..')
from modules import utils, constants
import torch
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import CheckpointCallback

In [None]:
SEED = constants.SEED
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)
SEED

In [None]:
constants.BETA

In [None]:
def stable_baselines3_dqn(X_train, y_train, steps, save, log_path, log_prefix, filename):
    training_env = utils.create_env(X_train, y_train)
    model = DQN('MlpPolicy', training_env, verbose=1, seed=constants.SEED)
    checkpoint_callback = CheckpointCallback(save_freq=constants.CHECKPOINT_FREQ, save_path=log_path, 
                                             name_prefix=log_prefix)
    model.learn(total_timesteps=timesteps, log_interval=100000, callback=checkpoint_callback)
    if save:
        model.save(f'{log_path}/{filename}.pkl')
    training_env.close()
    return model

def stable_vanilla_dqn(X_train, y_train, timesteps, save=False, log_path=None, log_prefix='dqn', filename=None, per=False):
    '''
    Creates and trains a standard DQN model
    '''
    if per:
        log_prefix = 'dqn_per'
    training_env = create_env(X_train, y_train)
    model = DQN('MlpPolicy', training_env, verbose=1, seed=constants.SEED, learning_rate=0.0001, buffer_size=1000000,
                learning_starts=50000, train_freq=4, target_network_update_freq=10000, exploration_final_eps=0.05, 
                n_cpu_tf_sess=1, policy_kwargs=dict(dueling=False),
                double_q=False, prioritized_replay=per)
    
    checkpoint_callback = CheckpointCallback(save_freq=constants.CHECKPOINT_FREQ, save_path=log_path, name_prefix=log_prefix)
    model.learn(total_timesteps=timesteps, log_interval=100000, callback=checkpoint_callback)
    if save:
        model.save(f'{log_path}/{filename}.pkl')
    training_env.close()
    return model

In [None]:
def run_dqn_model(model_type, steps):
    dir_name = f'seed_{SEED}_{steps}'
    parent_dir = f'../models/logs/{model_type}/missingness/0.1/biopsy_9'
    path = os.path.join(parent_dir, dir_name)
    os.mkdir(path)
    model = stable_baselines3_dqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_ddqn_per', 
                                  filename=f'dueling_ddqn_per_{steps}')
    return model

In [None]:
train_df = pd.read_csv('../new_data/train_set_missingness_0.1.csv')
train_df = train_df.fillna(-1)
train_df.head()

In [None]:
train_df.label.value_counts()

In [None]:
X_train = train_df.iloc[:, 0:-1]
y_train = train_df.iloc[:, -1]
X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape, y_train.shape

In [None]:
# model_names = ['dqn', 'ddqn', 'dueling_dqn', 'dueling_ddqn', 'dqn_per', 'ddqn_per', 'dueling_dqn_per', 
#                'dueling_ddqn_per']
model_names = ['dueling_dqn_per', 'dueling_ddqn_per']
procs = []
steps = int(10e7)

In [None]:
for name in model_names:
#     run_dqn_model(name, steps)
    proc = Process(target=run_dqn_model, args=(name, steps))
    procs.append(proc)
    proc.start()

In [None]:
for proc in procs:
    proc.join()
print('All jobs completed and terminated successfully')