In [1]:
import pandas as pd
import numpy as np
import random
import os
import torch
import sys
sys.path.append('../')
from modules import utils, simple_constants
from modules.simple_env import SimpleEnv
# from modules.env import LupusEnv
from sklearn.model_selection import train_test_split
from stable_baselines3 import DQN
import warnings
warnings.filterwarnings('ignore')

In [2]:
SEED = simple_constants.SEED
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)

In [3]:
class_dict = simple_constants.CLASS_DICT

#### The data

In [4]:
df = pd.read_csv('../data/very_simple_datasets/feats_22.csv')
df.head()

Unnamed: 0,ana,anti_dsdna_antibody,joint_involvement,proteinuria,pericardial_effusion,non_scarring_alopecia,leukopenia,delirium,low_c3_and_c4,fever,anti_cardioliphin_antibodies,label
0,0,0,1,1,1,0,1,1,1,0,1,No lupus
1,1,1,0,1,1,0,1,0,1,0,0,Lupus
2,0,1,0,0,1,1,0,0,1,1,1,No lupus
3,0,0,1,0,0,1,1,1,1,1,1,No lupus
4,0,1,1,1,1,0,0,0,0,0,0,No lupus


In [5]:
df['label'] = df['label'].replace(class_dict)
print(df.label.value_counts())
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=SEED)
training_df = pd.concat([X_train, y_train], axis=1).reset_index(drop=True)
testing_df = pd.concat([X_test, y_test], axis=1).reset_index(drop=True)
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

0    38047
1    31953
Name: label, dtype: int64


((56000, 11), (14000, 11), (56000,), (14000,))

In [6]:
y_train

array([1, 0, 0, ..., 0, 1, 1])

#### Training the model

In [7]:
def stable_dqn3(X_train, y_train, timesteps,  save=False, filename=None):
    training_env = SimpleEnv(X_train, y_train)
    model = DQN('MlpPolicy', training_env, verbose=1, seed=simple_constants.SEED, buffer_size=100000)
    model.learn(total_timesteps=timesteps, log_interval=100000)
    if save:
        model.save(filename)
    training_env.close()
#     return model

In [27]:
ft_num = 22
# for steps in [int(5.5e6), int(5.8e6), int(6.2e6), int(6.5e6), int(5.3e6), int(6.8e6)]:
for steps in [int(5e6), int(15e6), int(25e6)
    stable_dqn3(X_train, y_train, steps, True, f'../models/very_simple_models/dqn_fts_{ft_num}_{steps}')

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.6      |
|    ep_rew_mean      | -0.52    |
|    exploration_rate | 0.43     |
|    success_rate     | 0.24     |
| time/               |          |
|    episodes         | 100000   |
|    fps              | 2457     |
|    time_elapsed     | 153      |
|    total_timesteps  | 377922   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0202   |
|    n_updates        | 81980    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 8.59     |
|    ep_rew_mean      | 0.64     |
|    exploration_rate | 0.05     |
|    success_rate     | 0.82     |
| time/               |          |
|    episodes         | 200000   |
|    fps              | 1473     |
|    time_elapsed     | 724    

In [None]:
# timesteps = 15000000
# ft_num = 11
# training_env = SimpleEnv(X_train, y_train)
# # training_env = LupusEnv(X_train, y_train)
# # model = DQN('MlpPolicy', training_env, verbose=1, seed=SEED, learning_rate=0.00001)
# model = DQN('MlpPolicy', training_env, verbose=1, seed=SEED)
# model.learn(total_timesteps=timesteps, log_interval=100000)
# # model.save(f'../models/very_simple_models/reward_shaping/dqn_fts_{ft_num}_{timesteps}')
# training_env.close()
# def training(X_train, y_trai)

In [28]:
model = utils.load_dqn3('../models/very_simple_models/dqn_fts_11_6300000.zip')

Using stable baselines 3


#### Testing the model

In [29]:
test_df = pd.DataFrame()
testing_env = SimpleEnv(X_test, y_test, random=False)
# testing_env = LupusEnv(X_test, y_test, random=False)
count = 0
try:
    while True:
        count += 1
        if count %(len(X_test)/5)==0:
            print(f'Count: {count}')
        obs, done = testing_env.reset(), False
        while not done:
            action, states = model.predict(obs, deterministic=True)
            obs, rew, done, info = testing_env.step(action)
            if done==True:
                test_df = test_df.append(info, ignore_index=True)
except StopIteration:
    print('Testing done ....')
test_df.head() 

Count: 2800
Count: 5600
Count: 8400
Count: 11200
Count: 14000
Testing done ....


Unnamed: 0,index,episode_length,reward,y_pred,y_actual,trajectory,terminated,is_success
0,0.0,6.0,1.0,1.0,1.0,"[ana, joint_involvement, leukopenia, low_c3_an...",0.0,1.0
1,1.0,4.0,1.0,0.0,0.0,"[ana, leukopenia, joint_involvement, No lupus]",0.0,1.0
2,2.0,7.0,1.0,1.0,1.0,"[ana, joint_involvement, leukopenia, anti_card...",0.0,1.0
3,3.0,10.0,1.0,1.0,1.0,"[ana, joint_involvement, leukopenia, anti_card...",0.0,1.0
4,4.0,7.0,1.0,1.0,1.0,"[ana, joint_involvement, leukopenia, anti_card...",0.0,1.0


In [30]:
# testing_df.iloc[4]

In [31]:
len(testing_df[(testing_df.ana==1) & (testing_df.label==0)])

650

In [32]:
len(test_df.trajectory.value_counts())

98

#### Results

In [33]:
test_df.trajectory.value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 5231, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[ana, leukopenia, joint_involvement, No lupus]                                                                                                                                                 6959
[ana, joint_involvement, leukopenia, anti_cardioliphin_antibodies, low_c3_and_c4, non_scarring_alopecia, delirium, anti_dsdna_antibody, pericardial_effusion, Lupus]                            684
[ana, joint_involvement, leukopenia, anti_cardioliphin_antibodies, low_c3_and_c4, non_scarring_alopecia, Lupus]                                                                                 651
[ana, joint_involvement, leukopenia, anti_cardioliphin_antibodies, non_scarring_alopecia, low_c3_and_c4, Lupus]                                                                                 444
[ana, joint_involvement, leukopenia, low_c3_and_c4, non_scarring_alopecia, Lupus]                                                                                                               421
                    

In [34]:
test_df.episode_length.min(), test_df.episode_length.max()

(4.0, 12.0)

In [35]:
# len(testing_df[testing_df.ana == 0])

In [36]:
len(test_df.trajectory.value_counts())

98

In [37]:
test_df[test_df.episode_length == test_df.episode_length.min()].trajectory.value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 5231, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[ana, leukopenia, joint_involvement, No lupus]    6959
Name: trajectory, dtype: int64

In [38]:
success_rate, success_df = utils.success_rate(test_df)
success_rate

99.95

In [39]:
avg_length, avg_return = utils.get_avg_length_reward(test_df)
avg_length, avg_return

(6.7240714285714285, 0.999)

In [40]:
acc, f1, roc_auc = utils.test(test_df['y_actual'], test_df['y_pred'])
acc, f1, roc_auc

(99.95, 99.94962306719451, 99.9540018399264)

In [41]:
test_df.y_actual.value_counts()

0.0    7609
1.0    6391
Name: y_actual, dtype: int64

In [42]:
test_df.y_pred.value_counts()

0.0    7602
1.0    6398
Name: y_pred, dtype: int64

In [43]:
# test_df.to_csv(f'../test_dfs/very_simple_datasets/dqn_lupus_test_df_8ft_700000.csv', index=False)
# success_df.to_csv(f'../test_dfs/very_simple_datasets/dqn_lupus_success_df_8ft_700000.csv', index=False)

In [None]:
def training_and_testing(timesteps, feat_num):
    training_env = SimpleEnv(X_train, y_train)
    model = DQN('MlpPolicy', training_env, verbose=1, seed=SEED)
    model.learn(total_timesteps=timesteps, log_interval=100000)
    model.save(f'../models/very_simple_models/dqn_fts_{feat_num}_{timesteps}')
    training_env.close()
    
    test_df = pd.DataFrame()
    testing_env = SimpleEnv(X_test, y_test, random=False)
    count = 0
    try:
        while True:
            count += 1
            if count %(len(X_test)/5)==0:
                print(f'Count: {count}')
            obs, done = testing_env.reset(), False
            while not done:
                action, states = model.predict(obs, deterministic=True)
                obs, rew, done, info = testing_env.step(action)
                if done==True:
                    test_df = test_df.append(info, ignore_index=True)
    except StopIteration:
        print('Testing done ....')
    print(test_df.head())
    print(test_df.trajectory.value_counts())
    success_rate, success_df = utils.success_rate(test_df)
    print(f'Success rate: {success_rate}')
    acc, f1, roc_auc = utils.test(test_df['y_actual'], test_df['y_pred'])
    print(f'Accuracy: {acc}')
    print(f'F1 Score: {f1}')
    print(f'ROC AUC SCore: {roc_auc}')