In [1]:
import pandas as pd
import numpy as np
import random
import os
import torch
import sys
sys.path.append('../')
from modules import utils, simple_constants
from modules.simple_env import SimpleEnv
# from modules.env import LupusEnv
from sklearn.model_selection import train_test_split
from stable_baselines3 import DQN
import warnings
warnings.filterwarnings('ignore')

In [2]:
SEED = simple_constants.SEED
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)

In [3]:
class_dict = simple_constants.CLASS_DICT

#### The data

In [4]:
df = pd.read_csv('../data/very_simple_datasets/feats_22.csv')
df.head()

Unnamed: 0,ana,non_scarring_alopecia,anti_dsdna_antibody,joint_involvement,proteinuria,pericardial_effusion,leukopenia,delirium,low_c3,low_c4,...,pleural_effusion,psychosis,seizure,lupus_anti_coagulant,anti_β2gp1_antibodies,anti_smith_antibody,oral_ulcers,auto_immune_hemolysis,acute_pericarditis,label
0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,0,0,1,0
1,1,1,1,1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,1
2,0,0,0,1,0,0,1,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,1,0,1,0,0,0,1,0,0,0
4,0,0,0,0,0,0,0,0,1,1,...,0,0,1,0,0,0,0,1,0,0


In [5]:
df['label'] = df['label'].replace(class_dict)
print(df.label.value_counts())
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=SEED)
training_df = pd.concat([X_train, y_train], axis=1).reset_index(drop=True)
testing_df = pd.concat([X_test, y_test], axis=1).reset_index(drop=True)
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

0    54842
1    15158
Name: label, dtype: int64


((56000, 22), (14000, 22), (56000,), (14000,))

In [6]:
y_train

array([0, 0, 0, ..., 0, 0, 0])

#### Training the model

In [7]:
def stable_dqn3(X_train, y_train, timesteps,  save=False, filename=None):
    training_env = SimpleEnv(X_train, y_train)
    model = DQN('MlpPolicy', training_env, verbose=1, seed=simple_constants.SEED)
    model.learn(total_timesteps=timesteps, log_interval=100000)
    if save:
        model.save(filename)
    training_env.close()
#     return model

In [None]:
ft_num = 22
for steps in [int(5.9e6), int(6.1e6), int(6.3e6), int(6.4e6), int(6.6e6)]:
# for steps in [int(6e6)]:
    stable_dqn3(X_train, y_train, steps, True, f'../models/very_simple_models/dqn_ft_{ft_num}_{steps}')

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.9     |
|    ep_rew_mean      | 0.04     |
|    exploration_rate | 0.05     |
|    success_rate     | 0.52     |
| time/               |          |
|    episodes         | 100000   |
|    fps              | 1950     |
|    time_elapsed     | 401      |
|    total_timesteps  | 783269   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124   |
|    n_updates        | 183317   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 16.3     |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.05     |
|    success_rate     | 0.75     |
| time/               |          |
|    episodes         | 200000   |
|    fps              | 2077     |
|    time_elapsed     | 1076   

In [None]:
# timesteps = 15000000
# ft_num = 11
# training_env = SimpleEnv(X_train, y_train)
# # training_env = LupusEnv(X_train, y_train)
# # model = DQN('MlpPolicy', training_env, verbose=1, seed=SEED, learning_rate=0.00001)
# model = DQN('MlpPolicy', training_env, verbose=1, seed=SEED)
# model.learn(total_timesteps=timesteps, log_interval=100000)
# # model.save(f'../models/very_simple_models/reward_shaping/dqn_fts_{ft_num}_{timesteps}')
# training_env.close()
# def training(X_train, y_trai)

In [None]:
# model = utils.load_dqn3('../models/very_simple_models/dqn_rew_22_6000000.zip')

#### Testing the model

In [None]:
test_df = pd.DataFrame()
testing_env = SimpleEnv(X_test, y_test, random=False)
# testing_env = LupusEnv(X_test, y_test, random=False)
count = 0
try:
    while True:
        count += 1
        if count %(len(X_test)/5)==0:
            print(f'Count: {count}')
        obs, done = testing_env.reset(), False
        while not done:
            action, states = model.predict(obs, deterministic=True)
            obs, rew, done, info = testing_env.step(action)
            if done==True:
                test_df = test_df.append(info, ignore_index=True)
except StopIteration:
    print('Testing done ....')
test_df.head() 

In [None]:
# testing_df.iloc[4]

In [None]:
len(testing_df[(testing_df.ana==1) & (testing_df.label==0)])

In [None]:
len(test_df.trajectory.value_counts())

#### Results

In [None]:
test_df.trajectory.value_counts()

In [None]:
test_df.episode_length.min(), test_df.episode_length.max()

In [None]:
# len(testing_df[testing_df.ana == 0])

In [None]:
len(test_df.trajectory.value_counts())

In [None]:
test_df[test_df.episode_length == test_df.episode_length.min()].trajectory.value_counts()

In [None]:
success_rate, success_df = utils.success_rate(test_df)
success_rate

In [None]:
avg_length, avg_return = utils.get_avg_length_reward(test_df)
avg_length, avg_return

In [None]:
acc, f1, roc_auc = utils.test(test_df['y_actual'], test_df['y_pred'])
acc, f1, roc_auc

In [None]:
test_df.y_actual.value_counts()

In [None]:
test_df.y_pred.value_counts()

In [None]:
# test_df.to_csv(f'../test_dfs/very_simple_datasets/dqn_test_df_11ft_7000000.csv', index=False)
# success_df.to_csv(f'../test_dfs/very_simple_datasets/dqn_success_df_11ft_7000000.csv', index=False)

In [None]:
def training_and_testing(timesteps, feat_num):
    training_env = SimpleEnv(X_train, y_train)
    model = DQN('MlpPolicy', training_env, verbose=1, seed=SEED)
    model.learn(total_timesteps=timesteps, log_interval=100000)
    model.save(f'../models/very_simple_models/dqn_fts_{feat_num}_{timesteps}')
    training_env.close()
    
    test_df = pd.DataFrame()
    testing_env = SimpleEnv(X_test, y_test, random=False)
    count = 0
    try:
        while True:
            count += 1
            if count %(len(X_test)/5)==0:
                print(f'Count: {count}')
            obs, done = testing_env.reset(), False
            while not done:
                action, states = model.predict(obs, deterministic=True)
                obs, rew, done, info = testing_env.step(action)
                if done==True:
                    test_df = test_df.append(info, ignore_index=True)
    except StopIteration:
        print('Testing done ....')
    print(test_df.head())
    print(test_df.trajectory.value_counts())
    success_rate, success_df = utils.success_rate(test_df)
    print(f'Success rate: {success_rate}')
    acc, f1, roc_auc = utils.test(test_df['y_actual'], test_df['y_pred'])
    print(f'Accuracy: {acc}')
    print(f'F1 Score: {f1}')
    print(f'ROC AUC SCore: {roc_auc}')