In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import glob
import os
from multiprocessing import Process
import sys
sys.path.append('..')
from modules import utils, constants
from stable_baselines import DQN
from stable_baselines.deepq.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.common.callbacks import CheckpointCallback

In [None]:
SEED = constants.SEED
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
tf.set_random_seed(SEED)
tf.compat.v1.set_random_seed(SEED)
SEED

In [None]:
constants.BETA, constants.CHECKPOINT_FREQ

In [None]:
class CustomMlpPolicy(MlpPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomMlpPolicy, self).__init__(*args, **kwargs, layers=[128, 128, 128])

In [None]:
# def custom_network(input_shape, num_classes):
#     inputs = tf.keras.layers.Input(shape=input_shape)
#     shared_layer1 = tf.keras.layers.Dense(128, activation='relu')(inputs)
#     shared_layer2 = tf.keras.layers.Dense(128, activation='relu')(shared_layer1)
#     shared_layer3 = tf.keras.layers.Dense(128, activation='relu')(shared_layer2)
#     out = tf.keras.layers.Dense(num_actions, activation='softmax')(shared_layer3)
#     return inputs, out

#modify the 24, maybe num_actions etc.
# model = DQN('MlpPolicy', env, policy_kwargs={'net_arch': (24,), 'custom_objects': {'num_actions': num_classes}}, verbose=1)

In [None]:
def stable_dueling_dqn(X_train, y_train, timesteps, save=False, log_path=None, log_prefix='dueling_dqn', filename=None, per=False):
    '''
    Creates and trains a dueling DQN model
    '''
    if per:
        log_prefix = 'dueling_dqn_per'
    training_env = utils.create_env(X_train, y_train)
    model = DQN(CustomMlpPolicy, training_env, verbose=1, seed=constants.SEED, learning_rate=0.0001, 
                buffer_size=1000000, learning_starts=50000, train_freq=4, target_network_update_freq=10000, 
                exploration_final_eps=0.05, n_cpu_tf_sess=1, double_q=False, prioritized_replay=per)
    
    checkpoint_callback = CheckpointCallback(save_freq=constants.CHECKPOINT_FREQ, save_path=log_path, 
                                             name_prefix=log_prefix)
    model.learn(total_timesteps=timesteps, log_interval=100000, callback=checkpoint_callback)
    if save:
        model.save(f'{log_path}/{filename}.pkl')
    training_env.close()
    return model

In [None]:
def stable_dueling_ddqn(X_train, y_train, timesteps, save=False, log_path=None, log_prefix='dueling_ddqn', filename=None, per=False):
    '''
    Creates and trains a dueling double DQN model
    '''
    if per:
        log_prefix = 'dueling_ddqn_per'
    training_env = utils.create_env(X_train, y_train)
    model = DQN(CustomMlpPolicy, training_env, verbose=1, seed=constants.SEED, learning_rate=0.0001, 
                buffer_size=1000000, learning_starts=50000, train_freq=4, target_network_update_freq=10000, 
                exploration_final_eps=0.05, n_cpu_tf_sess=1, prioritized_replay=per)
    
    checkpoint_callback = CheckpointCallback(save_freq=constants.CHECKPOINT_FREQ, save_path=log_path, 
                                             name_prefix=log_prefix)
    model.learn(total_timesteps=timesteps, log_interval=100000, callback=checkpoint_callback)
    if save:
        model.save(f'{log_path}/{filename}.pkl')
    training_env.close()
    return model

In [None]:
def run_dqn_model(model_type, steps):
    dir_name = f'seed_{SEED}_{steps}'
    parent_dir = f'../models/logs/{model_type}/missingness/0.1/no_step_penalty/custom_arch_3_layers'
    path = os.path.join(parent_dir, dir_name)
    os.mkdir(path)
    
    if model_type == 'dueling_dqn_per':
        model = stable_dueling_dqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_dqn_per', 
                                   filename=f'dueling_dqn_per_{steps}', per=True)
    elif model_type == 'dueling_ddqn_per':
        model =stable_dueling_ddqn(X_train, y_train, steps, save=True, log_path=path, log_prefix='dueling_ddqn_per',
                                    filename=f'dueling_ddqn_per_{steps}', per=True)
    else:
        raise ValueError(f'Unknown model type - {model_type}!')
    return model

In [None]:
train_df = pd.read_csv('../new_data/train_set_missingness_0.1.csv')
train_df = train_df.fillna(-1)
train_df.head()

In [None]:
train_df.label.value_counts()

In [None]:
train_df.iloc[90]

In [None]:
X_train = train_df.iloc[:, 0:-1]
y_train = train_df.iloc[:, -1]
X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape, y_train.shape

In [None]:
model_names = ['dueling_dqn_per', 'dueling_ddqn_per']
procs = []
steps = int(100e6)

In [None]:
for name in model_names:
#     run_dqn_model(name, steps)
    proc = Process(target=run_dqn_model, args=(name, steps))
    procs.append(proc)
    proc.start()

In [None]:
for proc in procs:
    proc.join()
print('All jobs completed and terminated successfully')