In [1]:
import pandas as pd
import numpy as np
import random
import sys
sys.path.append('..')
from modules.env import LupusEnv
from stable_baselines3 import DQN
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
SEED

42

In [3]:
def create_env(X, y, random=True):
    '''
    Creates and environment using the given data
    '''
    env = LupusEnv(X, y, random)
    print(f'The environment seed is {env.seed()}') #to delete
    return env

In [4]:
def evaluate_dqn(dqn_model, X_test, y_test):
    '''
    Evaluates a DQN model on test data
    '''
    test_df = pd.DataFrame()
    env = create_env(X_test, y_test, random=False)
    count=0

    try:
        while True:
            count+=1
            obs, done = env.reset(), False
            while not done:
                action, _states = dqn_model.predict(obs, deterministic=True)
                obs, rew, done, info = env.step(action)
                if done == True:
                    test_df = test_df.append(info, ignore_index=True)
    except StopIteration:
        # print('Testing done.....')
        pass
    return test_df

In [5]:
def load_dqn(filename, env=None):
    '''
    Loads a previously saved DQN model
    '''
    model = DQN.load(filename, env=env)
    return model

In [6]:
def multiclass(actual_class, pred_class, average = 'macro'):
    '''
    Returns the ROC-AUC score for multi-labeled data
    '''

    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        other_class = [x for x in unique_class if x != per_class]
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc
    avg = sum(roc_auc_dict.values()) / len(roc_auc_dict)
    return avg

In [7]:
def testing(ytest, ypred):
    '''
    Return performance metrics for a model
    '''
    acc = accuracy_score(ytest, ypred)*100
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))*100
#     try:
    roc_auc = multiclass(ytest, ypred)*100
#     except:
#         roc_auc = None
    return acc, f1, roc_auc

In [8]:
test_data = pd.read_csv('../new_data/test_set_constant.csv')
test_data = test_data.fillna(-1)
test_data.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,proteinuria,biopsy_proven_lupus_nephritis,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,1,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,1,0,1
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,1
3,1,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,1,4,1,0,0,0,1,0,0,1


In [9]:
X_test = test_data.iloc[:, 0:-1]
y_test = test_data.iloc[:, -1]
X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

((14000, 24), (14000,))

In [10]:
model_name = 'robust_dqn3'
steps = int(100e6)

In [11]:
dqn_model_acc_ft = DQN.load(f'../models/logs/{model_name}/noisiness/0.0/biopsy_9/var_norm/seed_{SEED}_{steps}/best_acc_model.zip')
test_df = evaluate_dqn(dqn_model_acc_ft, X_test, y_test)
acc, f1, roc_auc = testing(test_df.y_actual, test_df.y_pred)
print(f'acc:{acc}, f1:{f1}, roc_auc:{roc_auc}, min length:{test_df.episode_length.min()}, mean length: {test_df.episode_length.mean()}, max length:{test_df.episode_length.max()}')

The environment seed is [126]
acc:87.72142857142858, f1:87.72538868208837, roc_auc:87.73098807039912, min length:15.0, mean length: 21.053214285714287, max length:25.0


In [12]:
testing(test_df.y_actual, test_df.y_pred)

(87.72142857142858, 87.72538868208837, 87.73098807039912)

In [13]:
# test_df.isna().sum()

In [14]:
# testing(test_df.y_actual, test_df.y_pred)

In [15]:
# test_df.to_csv(f'../test_dfs/{model_name}_biopsy_3_best_path_pahm_{SEED}_{steps}.csv', index=False)

In [16]:
# test_df.head()

In [17]:
# test_df.y_pred.value_counts()

### 