In [1]:
import pandas as pd
import numpy as np
import random
from os.path import isfile, join
import os
import ast
import sys
import torch
sys.path.append('..')
from modules import constants
from multiprocessing import Process
from stable_baselines3 import DQN
from modules.env import LupusEnv
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import warnings
warnings.filterwarnings('ignore')

In [2]:
# SEED = constants.SEED
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)
SEED

42

#### Reading the data

In [3]:
val_df = pd.read_csv('../new_data/val_set_constant.csv')
val_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,proteinuria,biopsy_proven_lupus_nephritis,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,1,0,0,0
4,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
X_val = val_df.iloc[:, 0:-1]
y_val = val_df.iloc[:, -1]
X_val, y_val = np.array(X_val), np.array(y_val)
X_val.shape, y_val.shape

((5600, 24), (5600,))

In [5]:
def get_steps(filename, prefix):
    try:
        return int(filename[len(prefix)+1:][:-10])
    except Exception as e:
        print(f'Filename: {filename}')
        print(f'Exception: {e}')

In [6]:
def load_dqn(filename, env=None):
    '''
    Loads a previously saved DQN model
    '''
    model = DQN.load(filename, env=env)
    return model

In [7]:
def create_env(X, y, random=True):
    '''
    Creates and environment using the given data
    '''
    env = LupusEnv(X, y, random)
    print(f'The environment seed is {env.seed()}') #to delete
    return env

In [8]:
def evaluate_dqn(dqn_model, X_test, y_test):
    '''
    Evaluates a DQN model on test data
    '''
    test_df = pd.DataFrame()
    env = create_env(X_test, y_test, random=False)
    count=0

    try:
        while True:
            count+=1
            obs, done = env.reset(), False
            while not done:
                action, _states = dqn_model.predict(obs, deterministic=True)
                obs, rew, done, info = env.step(action)
                if done == True:
                    test_df = test_df.append(info, ignore_index=True)
    except StopIteration:
        pass
    return test_df

In [9]:
def get_pathway_features_score(pathway):
    if isinstance(pathway, str):
        pathway = ast.literal_eval(pathway)
    total_feature_cost = 0
    for feat in pathway[:-1]:
        feat_score = constants.FEATURE_SCORES[feat]
        total_feature_cost += feat_score
    return total_feature_cost

In [10]:
def get_total_pathway_score(test_df):
    total_score = 0
    for i, row in test_df.iterrows():
        row_feature_score = get_pathway_features_score(row.trajectory)
        total_row_score = 1-(row_feature_score/constants.MAX_FEATURE_SCORE)
        total_score += total_row_score
    return (total_score/len(test_df))*100

In [11]:
def get_pahm_score(acc, pathway_score):
    pathway_acc_harmonic_mean = 2*(acc*pathway_score)/(acc+pathway_score)
    return pathway_acc_harmonic_mean

In [12]:
def get_weighted_pahm_score(numbers, weights): #both are list of the same length
    whm = (weights[0] + weights[1]) / ((weights[0] / numbers[0]) + (weights[1] / numbers[1]))
    return whm

In [15]:
def get_val_metrics(model, X_val, y_val):
    val_df = evaluate_dqn(model, X_val, y_val)
    acc, f1, roc_auc, = test(val_df['y_actual'], val_df['y_pred'])
    pathway_score = get_total_pathway_score(val_df)
    pahm_score = get_pahm_score(acc, pathway_score)
    wpahm_score = get_weighted_pahm_score([acc, pathway_score], [0.9, 0.1])
    min_path_length = val_df.episode_length.min()
    average_path_length = val_df.episode_length.mean()
    max_path_length = val_df.episode_length.max()
    min_sample_pathway = val_df[val_df.episode_length==min_path_length].trajectory.iloc[0]
    max_sample_pathway = val_df[val_df.episode_length==max_path_length].trajectory.iloc[0]
    return pathway_score, pahm_score, wpahm_score, acc, f1, roc_auc, min_path_length, average_path_length, max_path_length, min_sample_pathway, max_sample_pathway

In [16]:
def multiclass(actual_class, pred_class, average = 'macro'):
    '''
    Returns the ROC-AUC score for multi-labeled data
    '''

    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        other_class = [x for x in unique_class if x != per_class]
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc
    avg = sum(roc_auc_dict.values()) / len(roc_auc_dict)
    return avg

In [17]:
def test(ytest, ypred):
    '''
    Return performance metrics for a model
    '''
    acc = accuracy_score(ytest, ypred)
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))
    try:
        roc_auc = multiclass(ytest, ypred)
    except:
        roc_auc = None
    return acc*100, f1*100, roc_auc*100

In [19]:
def validate_model(model_name, seed, steps, X_val, y_val, prefix):
    best_f1, best_acc, best_roc_auc, best_pathway_score, best_pahm_score, best_wpahm_score = -1, -1, -1, -1, -1, -1
    perf_list = []
    count = 0
    
    folder = f'../models/logs/{model_name}/missingness/0.1/biopsy_9/sb3/seed_{seed}_{steps}'    
    for item in os.listdir(folder):        
        if item.startswith(prefix): # & (get_steps(item, prefix) > 20000000:
            path = join(folder, item)
            if (isfile(path)) & (path.endswith('.zip')):
                count+=1
                if count%10 == 0:
                    print(count)
                model = load_dqn(path)
                pathway_score, pahm_score, wpahm_score, acc, f1, roc_auc, min_length, avg_length, max_length, min_path, max_path = get_val_metrics(model, X_val, y_val)
                perf_dict = {'steps': get_steps(item, prefix), 'pathway_score':pathway_score, 
                             'pahm_score':pahm_score, 'weighted_pahm_score':wpahm_score, 'acc':acc, 'f1':f1, 
                             'roc_auc':roc_auc, 'min_path_length':min_length, 'avg_length':avg_length, 
                             'max_length':max_length, 'min_path':min_path, 'max_path':max_path} 
                perf_list.append(perf_dict)
                if pathway_score > best_pathway_score:
                    best_pathway_score = pathway_score
                    model.save(f'{folder}/best_pathway_model')
                if pahm_score > best_pahm_score:
                    best_pahm_score = pahm_score
                    model.save(f'{folder}/best_pahm_model')
                if wpahm_score > best_wpahm_score:
                    best_wpahm_score = wpahm_score
                    model.save(f'{folder}/best_weighted_pahm_model_9_1')
                if acc > best_acc:
                    best_acc = acc
                    model.save(f'{folder}/best_acc_model')
                if f1 > best_f1:
                    best_f1 = f1
                    model.save(f'{folder}/best_f1_model')
                if roc_auc > best_roc_auc:
                    best_roc_auc = roc_auc
                    model.save(f'{folder}/best_roc_auc_model')

    val_df = pd.DataFrame.from_dict(perf_list) 
    try:
        val_df = val_df.sort_values(by=['steps'])
    except:
        pass
    val_df = val_df.reset_index(drop=True)
    val_df.to_csv(f'{folder}/validation_results.csv', index=False)
    return val_df          

#### delete from here

In [20]:
# path='../models/logs/dueling_dqn_per/missingness/0.1/biopsy_9/sb3/seed_42_100000000/dueling_dqn_per_1000000_steps'

#### end here

In [21]:
models = ['dueling_dqn_per', 'dueling_ddqn_per']
steps =int(10e7)
procs=[]

In [22]:
for name in models:
    proc = Process(target=validate_model, args=(name, SEED, steps, X_val, y_val, name))
    procs.append(proc)
    proc.start()

The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
10
The environment seed is [42]
10
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The environment seed is [42]
The envi

In [23]:
for proc in procs:
    proc.join()
print('All jobs completed and terminated successfully')

All jobs completed and terminated successfully


#### Perfromance using test data