In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import os
from datetime import datetime
import torch
import sys
sys.path.append('../..')
from modules.many_features import utils, constants
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

  import pandas.util.testing as tm


In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)

In [3]:
#df= pd.read_csv('../../data/more_features/with_correlated_feature_0.1.csv')
df =pd.read_csv('../../data/more_features/more_feats_correlated_noisy_6.csv')
df = df.fillna(-1)
df.head()

Unnamed: 0,hemoglobin,ferritin,ret_count,segmented_neutrophils,tibc,mcv,serum_iron,rbc,gender,creatinine,cholestrol,copper,ethanol,folate,glucose,hematocrit,tsat,label
0,12.322384,163.121429,4.121959,-1.0,433.145097,100.147359,51.037057,3.691276,1,0.819482,147.693827,59.199141,41.958432,20.792161,101.383932,36.967153,11.782901,Unspecified anemia
1,8.298889,-1.0,2.07695,-1.0,483.617753,98.431076,-1.0,2.52935,0,-1.0,-1.0,-1.0,36.118322,-1.0,-1.0,24.896668,-1.0,Hemolytic anemia
2,12.696391,3.393723,-1.0,-1.0,451.933132,79.486542,85.001345,4.791902,1,-1.0,4.852168,89.831485,44.946238,0.965963,-1.0,38.089174,18.80839,Iron deficiency anemia
3,12.705102,-1.0,2.305379,-1.0,-1.0,81.057541,135.371313,4.702253,1,1.32414,32.717943,76.524319,-1.0,27.439316,-1.0,38.115305,-1.0,Aplastic anemia
4,8.211543,29.622561,-1.0,0.93619,479.914773,78.38848,-1.0,3.142634,1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,24.634629,-1.0,Iron deficiency anemia


In [4]:
utils.get_dt_performance(df)

(0.6596428571428572,
 0.6536656960257498,
 0.8005238687826254,
 datetime.timedelta(microseconds=2998))

In [5]:
df.label.value_counts()

No anemia                               16000
Anemia of chronic disease                8828
Iron deficiency anemia                   8331
Unspecified anemia                       8104
Aplastic anemia                          8093
Hemolytic anemia                         8089
Vitamin B12/Folate deficiency anemia     8082
Inconclusive diagnosis                   4473
Name: label, dtype: int64

In [6]:
class_dict = constants.CLASS_DICT
df['label'] = df['label'].replace(class_dict)
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=SEED)
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((56000, 17), (14000, 17), (56000,), (14000,))

In [7]:
action_list = list(class_dict.keys()) + [col  for col in df.columns if col!='label']
action_list

['No anemia',
 'Vitamin B12/Folate deficiency anemia',
 'Unspecified anemia',
 'Anemia of chronic disease',
 'Iron deficiency anemia',
 'Hemolytic anemia',
 'Aplastic anemia',
 'Inconclusive diagnosis',
 'hemoglobin',
 'ferritin',
 'ret_count',
 'segmented_neutrophils',
 'tibc',
 'mcv',
 'serum_iron',
 'rbc',
 'gender',
 'creatinine',
 'cholestrol',
 'copper',
 'ethanol',
 'folate',
 'glucose',
 'hematocrit',
 'tsat']

In [None]:
for steps in [int(17e6), int(18e6), int(19e6), int(20e6)]:
#for steps in [int(2e3)]:
    #start_time = datetime.now()
    dqn_model = utils.stable_dqn3(X_train, y_train, steps, True, 
                                  f'../../models/many_features/0.1/with_correlated_fts/dqn3_by_type_noisy_6_{steps}')
    #end_time = datetime.now()
    #print(f'The duration for {steps} steps is {end_time-start_time}')

using stable baselines 3
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.15     |
|    ep_rew_mean      | -0.84    |
|    exploration_rate | 0.843    |
|    success_rate     | 0.09     |
| time/               |          |
|    episodes         | 100000   |
|    fps              | 795      |
|    time_elapsed     | 353      |
|    total_timesteps  | 281143   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.59     |
|    n_updates        | 57785    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.22     |
|    ep_rew_mean      | -0.8     |
|    exploration_rate | 0.669    |
|    success_rate     | 0.11     |
| time/               |          |
|    episodes         | 200000   |
|    fps              | 725      |
|    t

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.04     |
|    ep_rew_mean      | -0.08    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.48     |
| time/               |          |
|    episodes         | 1600000  |
|    fps              | 666      |
|    time_elapsed     | 9910     |
|    total_timesteps  | 6600651  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.137    |
|    n_updates        | 1637662  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.37     |
|    ep_rew_mean      | -0.22    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.4      |
| time/               |          |
|    episodes         | 1700000  |
|    fps              | 665      |
|    time_elapsed     | 10566    |
|    total_timesteps  | 7029855  |
| train/              |          |
|    learning_rate  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.36     |
|    ep_rew_mean      | -0.14    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.48     |
| time/               |          |
|    episodes         | 3100000  |
|    fps              | 637      |
|    time_elapsed     | 20187    |
|    total_timesteps  | 12861059 |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0362   |
|    n_updates        | 3202764  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.92     |
|    ep_rew_mean      | 0        |
|    exploration_rate | 0.05     |
|    success_rate     | 0.54     |
| time/               |          |
|    episodes         | 3200000  |
|    fps              | 620      |
|    time_elapsed     | 21423    |
|    total_timesteps  | 13289344 |
| train/              |          |
|    learning_rate  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.65     |
|    ep_rew_mean      | -0.98    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.08     |
| time/               |          |
|    episodes         | 600000   |
|    fps              | 722      |
|    time_elapsed     | 2868     |
|    total_timesteps  | 2073314  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 39       |
|    n_updates        | 505828   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.92     |
|    ep_rew_mean      | -0.92    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.12     |
| time/               |          |
|    episodes         | 700000   |
|    fps              | 709      |
|    time_elapsed     | 3491     |
|    total_timesteps  | 2477946  |
| train/              |          |
|    learning_rate  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.57     |
|    ep_rew_mean      | 0.08     |
|    exploration_rate | 0.05     |
|    success_rate     | 0.55     |
| time/               |          |
|    episodes         | 2100000  |
|    fps              | 524      |
|    time_elapsed     | 16736    |
|    total_timesteps  | 8775286  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.138    |
|    n_updates        | 2181321  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.51     |
|    ep_rew_mean      | -0.12    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.45     |
| time/               |          |
|    episodes         | 2200000  |
|    fps              | 525      |
|    time_elapsed     | 17426    |
|    total_timesteps  | 9153256  |
| train/              |          |
|    learning_rate  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.72     |
|    ep_rew_mean      | -0.12    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.44     |
| time/               |          |
|    episodes         | 3600000  |
|    fps              | 540      |
|    time_elapsed     | 26723    |
|    total_timesteps  | 14453835 |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.103    |
|    n_updates        | 3600958  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.91     |
|    ep_rew_mean      | -0.04    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.48     |
| time/               |          |
|    episodes         | 3700000  |
|    fps              | 542      |
|    time_elapsed     | 27331    |
|    total_timesteps  | 14838214 |
| train/              |          |
|    learning_rate  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.78     |
|    ep_rew_mean      | -0.98    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.08     |
| time/               |          |
|    episodes         | 600000   |
|    fps              | 450      |
|    time_elapsed     | 4454     |
|    total_timesteps  | 2007870  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 77.3     |
|    n_updates        | 489467   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.01     |
|    ep_rew_mean      | -0.98    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.09     |
| time/               |          |
|    episodes         | 700000   |
|    fps              | 441      |
|    time_elapsed     | 5410     |
|    total_timesteps  | 2388099  |
| train/              |          |
|    learning_rate  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.92     |
|    ep_rew_mean      | 0        |
|    exploration_rate | 0.05     |
|    success_rate     | 0.53     |
| time/               |          |
|    episodes         | 2100000  |
|    fps              | 467      |
|    time_elapsed     | 18693    |
|    total_timesteps  | 8735350  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0587   |
|    n_updates        | 2171337  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.64     |
|    ep_rew_mean      | -0.1     |
|    exploration_rate | 0.05     |
|    success_rate     | 0.46     |
| time/               |          |
|    episodes         | 2200000  |
|    fps              | 467      |
|    time_elapsed     | 19563    |
|    total_timesteps  | 9140938  |
| train/              |          |
|    learning_rate  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.15     |
|    ep_rew_mean      | -0.08    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.47     |
| time/               |          |
|    episodes         | 3600000  |
|    fps              | 467      |
|    time_elapsed     | 32217    |
|    total_timesteps  | 15048483 |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.139    |
|    n_updates        | 3749620  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.15     |
|    ep_rew_mean      | 0.16     |
|    exploration_rate | 0.05     |
|    success_rate     | 0.59     |
| time/               |          |
|    episodes         | 3700000  |
|    fps              | 467      |
|    time_elapsed     | 33091    |
|    total_timesteps  | 15455110 |
| train/              |          |
|    learning_rate  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.8      |
|    ep_rew_mean      | -1       |
|    exploration_rate | 0.05     |
|    success_rate     | 0.07     |
| time/               |          |
|    episodes         | 600000   |
|    fps              | 574      |
|    time_elapsed     | 3484     |
|    total_timesteps  | 2003136  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 83.9     |
|    n_updates        | 488283   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.74     |
|    ep_rew_mean      | -1       |
|    exploration_rate | 0.05     |
|    success_rate     | 0.08     |
| time/               |          |
|    episodes         | 700000   |
|    fps              | 559      |
|    time_elapsed     | 4246     |
|    total_timesteps  | 2377772  |
| train/              |          |
|    learning_rate  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.99     |
|    ep_rew_mean      | -0.14    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.48     |
| time/               |          |
|    episodes         | 2100000  |
|    fps              | 469      |
|    time_elapsed     | 18909    |
|    total_timesteps  | 8881320  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.222    |
|    n_updates        | 2207829  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.87     |
|    ep_rew_mean      | 0        |
|    exploration_rate | 0.05     |
|    success_rate     | 0.52     |
| time/               |          |
|    episodes         | 2200000  |
|    fps              | 471      |
|    time_elapsed     | 19714    |
|    total_timesteps  | 9294776  |
| train/              |          |
|    learning_rate  

In [None]:
# training_env = utils.create_env(X_train, y_train)
# dqn_model = utils.load_dqn3('../../models/many_features/0.1/with_correlated_fts/dqn3_by_type_noisy_4_19000000', training_env)
# test_df = utils.evaluate_dqn(dqn_model, X_test, y_test)
# test_df.head()

In [None]:
#utils.diagnose_sample(dqn_model, X_test, y_test, 1)

In [None]:
# test_df[(test_df.y_pred==1) & (test_df.y_actual==1)]

In [None]:
test_df.y_pred.value_counts()

In [None]:
success_rate, success_df = utils.success_rate(test_df)
success_rate

In [None]:
avg_length, avg_return = utils.get_avg_length_reward(test_df)
avg_length, avg_return

In [None]:
acc, f1, roc_auc = utils.test(test_df['y_actual'], test_df['y_pred'])
acc, f1, roc_auc

In [None]:
test_df.y_pred.unique()

In [None]:
# test_df.to_csv(f'../../test_dfs/many_features/0.1/test_df3_noisy_1_11000000.csv', index=False)
# success_df.to_csv(f'../../test_dfs/many_features/0.1/success_df3_noisy_1_11000000.csv', index=False)