In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import os
import tensorflow
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tensorflow.set_random_seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)

#### The data

In [3]:
df = pd.read_csv('data/anemia_synth_dataset_hb_some_nans.csv')
df = df.fillna(0)
classes = list(df.label.unique())
nums = [i for i in range(len(classes))]
class_dict = dict(zip(classes, nums))
class_dict

{'No anemia': 0,
 'Hemolytic anemia': 1,
 'Aplastic anemia': 2,
 'Iron deficiency anemia': 3,
 'Vitamin B12/Folate deficiency anemia': 4,
 'Anemia of chronic disease': 5}

In [4]:
df['label'] = df['label'].replace(class_dict)
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=SEED)
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((26868, 6), (11515, 6), (26868,), (11515,))

In [5]:
action_list = list(class_dict.keys()) + [col  for col in df.columns if col!='label']
len(action_list)

12

In [6]:
action_list

['No anemia',
 'Hemolytic anemia',
 'Aplastic anemia',
 'Iron deficiency anemia',
 'Vitamin B12/Folate deficiency anemia',
 'Anemia of chronic disease',
 'hemoglobin',
 'ferritin',
 'ret_count',
 'segmented_neutrophils',
 'tibc',
 'mcv']

#### The Environment

In [7]:
from envs import SyntheticPPOEnv

#### The Agent

In [8]:
from stable_baselines.common.env_checker import check_env
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
#from stable_baselines import DQN
from stable_baselines import bench, logger

  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."


In [9]:
def stable_ppo():
    training_env = SyntheticPPOEnv(X_train, y_train)
    env = DummyVecEnv([lambda: bench.Monitor(training_env, logger.get_dir())])
    #env = make_vec_env('CartPole-v1', n_envs=5)
    #model = PPO2(MlpPolicy, env, verbose=1)
    #model.learn(total_timesteps=25000)
    #env = bench.Monitor(training_env, logger.get_dir())
    model = PPO2('MlpPolicy', training_env, verbose=1, seed=SEED, n_cpu_tf_sess=1)
    model.learn(total_timesteps=int(6.5e6), log_interval=10000)
    #model.learn(total_timesteps=int(1.2e5), log_interval=10000)
    #model.save('models/synthetic_stable_dqn_1.8.pkl')
    model.save('models/ppo/synthentic_with_hb_some_nans_stable_dqn65e6.pkl')
    env.close()
    return model

ppo_model = stable_ppo()

Wrapping the env in a DummyVecEnv.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
--------------------------------------
| approxkl           | 0.00014339288 |
| clipfrac           | 0.0           |
| explained_variance | -0.036        |
| fps                | 409           |
| n_updates          | 1             |
| policy_entropy     | 2.4847782     |
| policy_loss        | -0.0057206964 |
| serial_timesteps   | 128           |
| time_elapsed       | 0             |
| total_timesteps    | 128           |
| value_loss         | 0.6671917     |
--------------------------------------
-------------------------------------
| approxkl           | 0.0027202123 |
| clipfrac           | 0.025390625  |
| explained_variance | 0.3          |
| fps                | 2032         |
| n_updat

#### Performance Evaluation

In [10]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, auc, roc_curve

In [11]:
def test(ytest, ypred):
    acc = accuracy_score(ytest, ypred)
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))
    try:
        roc_auc = multiclass(ytest, ypred)
    except:
        roc_auc = None
    return acc, f1, roc_auc

In [12]:
def get_avg_length_reward(df):
    length = np.mean(df.episode_length)
    reward = np.mean(df.reward)
    return length, reward

In [13]:
def synthetic_ppo_eval(dqn_model):
    attempts, correct = 0,0
    test_df = pd.DataFrame()

    env = SyntheticPPOEnv(X_test, y_test, random=False)
    #testing_env = SyntheticComplexHbEnv(X_test, y_test, random=False)
    #env = DummyVecEnv([lambda: testing_env])
    count=0

    try:
        while True:
            count+=1
            if count%5000==0:
                print(f'Count: {count}')
            obs, done = env.reset(), False
            while not done:
                action, _states = ppo_model.predict(obs, deterministic=True)
                obs, rew, done,info = env.step(action)
                #if (done==True) & (np.isfinite(info['y_pred'])):
                if done == True:
                    test_df = test_df.append(info, ignore_index=True)
                #print('....................TEST DF ....................')
                #if len(test_df) != 0:
                #    print(test_df.head())

    except StopIteration:
        print('Testing done.....')
    return test_df

test_df = synthetic_ppo_eval(ppo_model)

Count: 5000
Count: 10000
Testing done.....


In [14]:
len(test_df), len(X_test)

(11515, 11515)

In [15]:
y_pred_df = test_df[test_df['y_pred'].notna()]
success_df = y_pred_df[y_pred_df['y_pred']== y_pred_df['y_actual']]
len(success_df)

6985

In [16]:
y_pred_df.iloc[0]['trajectory']

['hemoglobin', 'ret_count', 'mcv', 'Hemolytic anemia']

In [17]:
y_pred_df.y_pred.unique()

array([1., 2., 5., 0., 3.])

In [18]:
success_rate = len(success_df)/len(test_df)*100
success_rate

60.660008684324794

In [19]:
avg_length, avg_return = get_avg_length_reward(test_df)
avg_length, avg_return

(5.3501519756838904, 1.6772036474164134)

In [20]:
acc, f1, roc_auc = test(y_pred_df['y_actual'], y_pred_df['y_pred'])
acc, f1, roc_auc

(0.9012903225806451, 0.6033645983005789, None)

#### OLD CODE

In [None]:
# from stable_baselines.common.env_checker import check_env
# from stable_baselines.common.policies import MlpPolicy
# from stable_baselines.common.vec_env import DummyVecEnv
# from stable_baselines import PPO2
import time
import tensorflow as tf
from gym.wrappers.time_limit import TimeLimit

from baselines.ppo2 import ppo2
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv

from baselines import bench
from baselines import logger
from baselines import deepq
from baselines.common.tf_util import make_session

#### PPO

In [None]:
def synthetic_ppo():
    logger.configure(dir='./logs/synthetic_ppo2', format_strs=['stdout', 'tensorboard'])
    env = DummyVecEnv([lambda: bench.Monitor(SyntheticEnv(X_train, y_train), logger.get_dir())])

    model = ppo2.learn(
        env=env,
        network='mlp',
        num_layers=2,
        num_hidden=64,
        nsteps=7,
        nminibatches=7,
        total_timesteps=int(1.2e5), #total number of samples to train on
        #total_timesteps = 10000,
        seed=42)
    model.save('models/synthetic_ppo2.pkl')

    return model

start_time = time.time()
ppo_model = synthetic_ppo()
print('PPO Training Time:', time.time() - start_time)

In [None]:
X_train[204], y_train[204]

#### Performance Evaluation

In [None]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, auc, roc_curve

In [None]:
def multiclass(actual_class, pred_class, average = 'macro'):

    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        other_class = [x for x in unique_class if x != per_class]
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc
    avg = sum(roc_auc_dict.values()) / len(roc_auc_dict)
    return avg

In [None]:
def test(ytest, ypred):
    acc = accuracy_score(ytest, ypred)
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))
    try:
        roc_auc = multiclass(ytest, ypred)
    except:
        roc_auc = None
    return acc, f1, roc_auc

In [None]:
def get_avg_length_reward(df):
    length = np.mean(df.episode_length)
    reward = np.mean(df.reward)
    return length, reward

In [None]:
def synthetic_ppo_eval(ppo_model):
    #orig_env = SyntheticEnv(X_test[:4], y_test[:4], random=False)
    #env = DummyVecEnv([lambda: orig_env])
    env = DummyVecEnv([lambda: SyntheticEnv(X_test, y_test, random=False)])
    test_df = pd.DataFrame()

    try:
        while True:
            obs, done = env.reset(), [False]
            while not done[0]:
                #print('a')
                obs, rew, done, info = env.step(ppo_model.step(obs[None])[0]) #resets env here
                #print('b')
                #print(f'obs: {obs}')
                #print(f'reward: {rew}')
                #print(f'done: {done}')
                #print(f'info: {info}')
                if done==True:
                    #print('c')
                    test_df = test_df.append(info[0], ignore_index=True)
                    #print('d')
                    #print(f'length of test_df: {len(test_df)}')
                    #orig_env.idx+=1


    except StopIteration:
        print('Testing done.....')
        
    return test_df

test_df = synthetic_ppo_eval(ppo_model)

In [None]:
X_test[3]

In [None]:
len(X_test[:4]), len(test_df)

In [None]:
y_pred_df = test_df[test_df['y_pred'].notna()]
success_df = y_pred_df[y_pred_df['y_pred']== y_pred_df['y_actual']]
len(success_df)

In [None]:
success_rate = len(success_df)/len(test_df)*100
success_rate

In [None]:
#eavg length and return 
avg_length, avg_return = get_avg_length_reward(test_df)
avg_length, avg_return

In [None]:
acc, f1, roc_auc = test(y_pred_df['y_actual'], y_pred_df['y_pred'])
acc, f1, roc_auc

In [None]:
# Look at misdiagnosed episodes

# Look at episodes that exceed max length