In [12]:
import pandas as pd
import numpy as np
import random
import os
from os.path import isfile, join
# import torch
import sys
sys.path.append('../')
from modules import utils, constants
from modules.env import LupusEnv
# import stable_baselines3, sb3_contrib
import stable_baselines
import warnings
import tensorflow
from gym.spaces import Box
from stable_baselines.common.vec_env import DummyVecEnv
warnings.filterwarnings('ignore')

In [13]:
SEED = 84
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
tensorflow.set_random_seed(constants.SEED)
# torch.manual_seed(SEED)
# torch.use_deterministic_algorithms(True)

#### Useful functions

In [14]:
def get_steps(filename, prefix):
    try:
        return int(filename[len(prefix):][:-10])
    except Exception as e:
        print(f'Filename: {filename}')
        print(f'Exception: {e}')

In [15]:
def load_model(filepath, model_type, env):
    if model_type.lower() == 'dqn3': #dqn3
        return stable_baselines3.DQN.load(filepath, env=env)
    elif model_type.lower() == 'dqn': #dqn
        return stable_baselines.DQN.load(filepath, env=env)
    elif model_type.lower() == 'ppo': #PPO
        return stable_baselines3.PPO.load(filepath, env=env)
    elif model_type.lower() == 'ppo2': #PPO2
        return stable_baselines.PPO2.load(filepath, env=env)
    elif model_type.lower() == 'ppo3': #PPO3
        return stable_baselines3.PPO3.load(filepath, env=env) 
    elif model_type.lower() == 'ac': #AC
        return stable_baselines3.AC.load(filepath, env=env) 
    elif model_type.lower() == 'a2c': #A2C
#         return stable_baselines3.A2C.load(filepath, env=env)
        return stable_baselines.A2C.load(filepath, env=env)
    elif model_type.lower() == 'acer': #ACER
        return stable_baselines.ACER.load(filepath, env=env) 
    elif model_type.lower() == 'ddpg': #DDPG
        return stable_baselines3.DDPG.load(filepath, env=env)
    elif model_type.lower() == 'pg': #PG
        return stable_baselines3.PG.load(filepath, env=env)
    elif model_type.lower() == 'acktr': #TD3
        return stable_baselines.ACKTR.load(filepath, env=env)
    elif model_type.lower() == 'trpo': #TRPO
        return sb3_contrib.TRPO.load(filepath, env=env)
    else:
        raise ValueError(f'Unknown model type {model_type}')

In [16]:
def get_val_metrics(model, validation_env):
    val_df = pd.DataFrame()
    try:
        while True:
            obs, done = validation_env.reset(), False
            while not done:
                action, states = model.predict(obs, deterministic=True)
                obs, rew, done, info = validation_env.step(action)
                if done==True:
                    val_df = val_df.append(info, ignore_index=True)

    except StopIteration:
        pass
    acc, f1, roc_auc, = utils.test(val_df['y_actual'], val_df['y_pred'])
    min_path_length = val_df.episode_length.min()
    average_path_length = val_df.episode_length.mean()
    max_path_length = val_df.episode_length.max()
    min_sample_pathway = val_df[val_df.episode_length==min_path_length].trajectory.iloc[0]
    max_sample_pathway = val_df[val_df.episode_length==max_path_length].trajectory.iloc[0]
    return acc, f1, roc_auc, min_path_length, average_path_length, max_path_length, min_sample_pathway, max_sample_pathway

In [17]:
def create_val_df(folder, X_val, y_val, prefix, model_type='dqn3'):
    best_f1, best_acc, best_roc_auc = -1, -1, -1
    perf_list = []
    count = 0
        
    for item in os.listdir(folder):
        if count%100==0:
            print(count)
        count+=1
        if item.startswith(prefix):
            path = join(folder, item)
#             print(path)
            if (isfile(path)) & (path.endswith('.zip')):
                validation_env = DummyVecEnv([lambda: LupusEnv(X_val, y_val, random=False)])
#                 validation_env = LupusEnv(X_val, y_val, random=False)
                validation_env.seed(SEED)
                validation_env.observation_space = Box(0, 3, (constants.FEATURE_NUM,))
                model = load_model(path, model_type, validation_env)
                acc, f1, roc_auc, min_path_length, average_path_length, max_path_length, min_sample_pathway, max_sample_pathway = get_val_metrics(model, validation_env)

                perf_dict = {'steps': get_steps(item, prefix), 'acc':acc, 'f1':f1, 'roc_auc':roc_auc, 
                              'min_path_length':min_path_length, 'average_path_length':average_path_length, 
                              'max_path_length':max_path_length, 'min_sample_pathway':min_sample_pathway, 
                              'max_sample_pathway':max_sample_pathway} 
                perf_list.append(perf_dict)
                if acc > best_acc:
                    best_acc = acc
                    model.save(f'{folder}/best_acc_model')
                if f1 > best_f1:
                    best_f1 = f1
                    model.save(f'{folder}/best_f1_model')
                if roc_auc > best_roc_auc:
                    best_roc_auc = roc_auc
                    model.save(f'{folder}/best_roc_auc_model')

#     print('creating dataframe object')
        val_df = pd.DataFrame.from_dict(perf_list) 
        val_df = val_df.sort_values(by=['steps'])
        val_df = val_df.reset_index(drop=True)
#     print('saving validation results')
        val_df.to_csv(f'{folder}/validation_results.csv', index=False)
    return val_df

#### Validation

In [18]:
validation_df = pd.read_csv('../data/missingness/0/validation_set.csv')
validation_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,1,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,1,0,1
1,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,1,0,0,1,0,...,0,1,0,1,1,0,0,0,0,1


In [19]:
X_val = validation_df.iloc[:, 0:-1]
y_val = validation_df.iloc[:, -1]
X_val, y_val = np.array(X_val), np.array(y_val)
X_val.shape, y_val.shape

((5600, 23), (5600,))

In [20]:
folder = '../models/logs/sb/dqn_seed_84'
prefix = 'dqn_vanilla_basic_'
val_df = create_val_df(folder, X_val, y_val, prefix, 'dqn')
val_df.head()

0
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use tf.cast instead.
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500


Unnamed: 0,steps,acc,f1,roc_auc,min_path_length,average_path_length,max_path_length,min_sample_pathway,max_sample_pathway
0,100000,3.071429,5.40201,48.902937,8,20.265,24,"[leukopenia, low_c4, fever, acute_pericarditis...","[leukopenia, low_c4, fever, acute_pericarditis..."
1,200000,0.0,0.0,50.0,14,23.500357,24,"[delirium, ana, anti_β2gp1_antibodies, anti_ca...","[delirium, fever, thrombocytopenia, seizure, a..."
2,300000,0.0,0.0,50.0,16,23.691786,24,"[cutaneous_lupus, low_c3, ana, anti_cardioliph...","[cutaneous_lupus, pericardial_effusion, thromb..."
3,400000,0.0,0.0,50.0,16,23.771071,24,"[proteinuria, delirium, cutaneous_lupus, non_s...","[proteinuria, low_c3, acute_pericarditis, cuta..."
4,500000,0.0,0.0,50.0,16,23.813929,24,"[proteinuria, acute_pericarditis, cutaneous_lu...","[proteinuria, acute_pericarditis, cutaneous_lu..."


#### delete from here

#### end here

#### Testing