In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import os
import torch
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
from envs import SyntheticComplexEnv, SyntheticSimpleEnv

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)

#### The Data

In [3]:
df = pd.read_csv('data/anemia_synth_dataset.csv')
df = df.fillna(0)
classes = list(df.label.unique())
nums = [i for i in range(len(classes))]
class_dict = dict(zip(classes, nums))
df['label'] = df['label'].replace(class_dict)
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=SEED)
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

# df = pd.read_csv('data/dataset_10000.csv')
# class_dict = {'A':0, 'B':1, 'C':2}
# df['label'] = df['label'].replace(class_dict)
# X = df.iloc[:, 0:-1]
# y = df.iloc[:, -1]

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=SEED)
# X_train, y_train = np.array(X_train), np.array(y_train)
# X_test, y_test = np.array(X_test), np.array(y_test)
# X_train.shape, X_test.shape, y_train.shape, y_test.shape

((19877, 8), (8519, 8), (19877,), (8519,))

In [4]:
training_env = SyntheticComplexEnv(X_train, y_train)

#### The Agent

In [5]:
from stable_baselines3 import DQN

In [6]:
model = DQN('MlpPolicy', training_env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [7]:
model.learn(total_timesteps=int(1.8e6), log_interval=100000)

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.98     |
|    ep_rew_mean      | 2.54     |
|    exploration_rate | 0.05     |
|    success_rate     | 0.32     |
| time/               |          |
|    episodes         | 100000   |
|    fps              | 1101     |
|    time_elapsed     | 461      |
|    total_timesteps  | 508378   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.85     |
|    n_updates        | 114594   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 5.7      |
|    ep_rew_mean      | 3.14     |
|    exploration_rate | 0.05     |
|    success_rate     | 0.67     |
| time/               |          |
|    episodes         | 200000   |
|    fps              | 992      |
|    time_elapsed     | 1057     |
|    total_timesteps  | 1048667  |
| train/              |          |
|    learning_rate  

<stable_baselines3.dqn.dqn.DQN at 0x18e5fb6a7c8>

#### Testing

In [8]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, auc, roc_curve

In [9]:
def multiclass(actual_class, pred_class, average = 'macro'):

    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        other_class = [x for x in unique_class if x != per_class]
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc
    avg = sum(roc_auc_dict.values()) / len(roc_auc_dict)
    return avg

In [10]:
def test(ytest, ypred):
    acc = accuracy_score(ytest, ypred)
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))
    try:
        roc_auc = multiclass(ytest, ypred)
    except:
        roc_auc = None
    return acc, f1, roc_auc

In [11]:
def get_avg_length_reward(df):
    length = np.mean(df.episode_length)
    reward = np.mean(df.reward)
    return length, reward

In [13]:
def synthetic_dqn_eval(dqn_model):
    attempts, correct = 0,0
    test_df = pd.DataFrame()

    env = SyntheticComplexEnv(X_test, y_test, random=False)
    count=0

    try:
        while True:
            count+=1
            if count%5000==0:
                print(f'Count: {count}')
            obs, done = env.reset(), False
            while not done:
                action, _states = dqn_model.predict(obs, deterministic=True)
                obs, rew, done,info = env.step(action)
                #if (done==True) & (np.isfinite(info['y_pred'])):
                if done == True:
                    test_df = test_df.append(info, ignore_index=True)
                #print('....................TEST DF ....................')
                #if len(test_df) != 0:
                #    print(test_df.head())

    except StopIteration:
        print('Testing done.....')
    return test_df

test_df = synthetic_dqn_eval(model)

Count: 5000
Testing done.....


In [14]:
y_pred_df = test_df[test_df['y_pred'].notna()]
success_df = y_pred_df[y_pred_df['y_pred']== y_pred_df['y_actual']]
len(success_df)

6920

In [15]:
y_pred_df.y_pred.unique()

array([0., 2., 3., 1.])

In [16]:
success_rate = len(success_df)/len(test_df)*100
success_rate

81.23019133701138

In [17]:
#avg length and return 
avg_length, avg_return = get_avg_length_reward(test_df)
avg_length, avg_return

(5.952693978166452, 4.000821692686935)

In [18]:
acc, f1, roc_auc = test(y_pred_df['y_actual'], y_pred_df['y_pred'])
acc, f1, roc_auc

  'precision', 'predicted', average, warn_for)


(0.9815602836879432, 0.5912753797877773, 0.7845426981890689)