In [1]:
import pandas as pd
import numpy as np
import random
import os
import torch
import sys
sys.path.append('../')
from modules import utils, constants

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)

In [3]:
# testing_df = pd.read_csv('../data/18_jan/test_set_basic.csv')
testing_df = pd.read_csv('../data/missingness/0/training_set.csv')
testing_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1
3,1,0,0,0,0,0,0,1,1,0,...,0,1,0,0,0,0,0,1,1,1
4,1,0,0,1,1,1,0,0,0,0,...,0,1,0,0,0,1,0,0,1,1


In [4]:
len(testing_df[testing_df.ana == 0])

14400

In [5]:
testing_df.tail()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
50395,1,0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
50396,1,0,1,0,0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,0
50397,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
50398,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,1
50399,1,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [6]:
X_test = testing_df.iloc[:, 0:-1]
y_test = testing_df.iloc[:, -1]

X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

((50400, 23), (50400,))

In [7]:
dqn_model = utils.load_dqn3('../models/24_feb/dqn_basic_11279933')
test_df = utils.evaluate_dqn(dqn_model, X_test, y_test)
# test_df = pd.read_csv('../test_dfs/21_jan/dqn_test_df_13000000.csv')
test_df.head()

Using stable baselines 3
Count: 10080
Count: 20160
Count: 30240
Count: 40320
Count: 50400
Testing done.....


Unnamed: 0,index,episode_length,reward,y_pred,y_actual,trajectory,terminated,is_success
0,0.0,2.0,2.0,0.0,0.0,"[ana, No lupus]",0.0,1.0
1,1.0,2.0,2.0,0.0,0.0,"[ana, No lupus]",0.0,1.0
2,2.0,7.0,1.583333,1.0,1.0,"[ana, anti_dsdna_antibody, low_c4, thrombocyto...",0.0,1.0
3,3.0,6.0,1.666667,1.0,1.0,"[ana, anti_dsdna_antibody, low_c4, thrombocyto...",0.0,1.0
4,4.0,4.0,1.833333,1.0,1.0,"[ana, anti_dsdna_antibody, pericardial_effusio...",0.0,1.0


In [8]:
test_df.episode_length.min(), test_df.episode_length.max()

(2.0, 14.0)

In [9]:
test_df[test_df.episode_length == test_df.episode_length.min()].iloc[-1]['trajectory']

['ana', 'No lupus']

In [10]:
success_rate, success_df = utils.success_rate(test_df)
success_rate

87.85119047619048

In [11]:
avg_length, avg_return = utils.get_avg_length_reward(test_df)
avg_length, avg_return

(5.137242063492064, 1.3740988756614827)

In [12]:
acc, f1, roc_auc = utils.test(test_df['y_actual'], test_df['y_pred'])
acc, f1, roc_auc

(87.85119047619048, 87.82139288865712, 87.79339410842407)

In [13]:
test_df.y_pred.value_counts()

0.0    26690
1.0    23710
Name: y_pred, dtype: int64

In [14]:
acc_bin, prec_bin, recall_bin, f1_bin = utils.test_binary(test_df['y_actual'], test_df['y_pred'])
acc_bin, prec_bin, recall_bin, f1_bin

(87.85119047619048, 88.11471952762547, 86.3412819771046, 87.21898678689962)

In [None]:
test_df[test_df.episode_length==5].iloc[-1]['trajectory']

In [None]:
test_df.y_actual.value_counts()

#### Saving files

In [None]:
# test_df.to_csv(f'../test_dfs/21_jan/dqn_test_df_13000000.csv', index=False)
# success_df.to_csv(f'../test_dfs/21_jan/dqn_success_df_13000000.csv', index=False)