In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import os
from datetime import datetime
# import torch
from stable_baselines import PPO2
from stable_baselines import bench, logger
import tensorflow
import sys
sys.path.append('..')
from modules import utils, constants
from modules.env import AnemiaEnv
from sklearn.model_selection import train_test_split
from stable_baselines.common.callbacks import CheckpointCallback
import matplotlib.pyplot as plt
%matplotlib inline

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."


In [2]:
# SEED = constants.SEED
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
# os.environ['PYTHONHASHSEED']=str(SEED)
tensorflow.set_random_seed(constants.SEED)
tensorflow.compat.v1.set_random_seed(constants.SEED)
SEED
# torch.manual_seed(SEED)
# torch.use_deterministic_algorithms(True)




42

#### The data

In [3]:
train_df = pd.read_csv('../data/train_set_basic.csv')
train_df.head()

Unnamed: 0,hemoglobin,ferritin,ret_count,segmented_neutrophils,tibc,mcv,serum_iron,rbc,gender,creatinine,cholestrol,copper,ethanol,folate,glucose,hematocrit,tsat,label
0,14.728733,-1.0,3.170892,-1.0,-1.0,-1.0,-1.0,-1.0,1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,44.1862,-1.0,0
1,10.405752,9.634615,5.659537,-1.0,-1.0,77.413788,212.671838,4.032519,0,0.88713,96.311597,-1.0,43.218595,-1.0,83.207518,31.217256,-1.0,4
2,15.132737,358.914888,1.842252,3.797487,315.102272,80.500314,-1.0,5.639507,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,45.398211,-1.0,0
3,11.340169,-1.0,1.662209,2.441767,-1.0,97.033963,102.079062,3.506041,1,1.020527,127.281715,-1.0,20.847013,-1.0,62.210273,34.020508,-1.0,6
4,6.691485,-1.0,3.337971,-1.0,-1.0,99.838438,24.119564,2.010694,0,1.957666,34.633063,-1.0,34.612121,-1.0,112.411298,20.074456,-1.0,5


In [4]:
len(train_df)

50400

In [5]:
X_train = train_df.iloc[:, 0:-1]
y_train = train_df.iloc[:, -1]

X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape, y_train.shape

((50400, 17), (50400,))

#### The PPO Agent

In [6]:
log_path = f'../models/sb/ppo/seed_{SEED}'
log_prefix = f'ppo_seed_{SEED}'

for steps in [int(1e6), int(12e6), int(15e6)]:
# steps = int(500e6)
    model_name =f'ppo_basic_{SEED}_{steps}'
    filename = f'../models/sb/ppo/seed_{SEED}/{model_name}'
    utils.stable_ppo(X_train, y_train, steps, log_path, log_prefix, True, filename)

Wrapping the env in a DummyVecEnv.




Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.





Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



-------------------------------------
| approxkl           | 0.0003090709 |
| clipfrac           | 0.0          |
| explained_variance | -1.14        |
| fps                | 721          |
| n_updates          | 1            |
| policy_entropy     | 3.2186642    |
| policy_loss        | -0.012368135 |
| serial_timesteps   | 128          |
| time_elapsed       | 1.41e-05     |
| total_timesteps    | 128          |
| value_loss         | 0.4942661    |
-------------------------------------
Wrapping the env in a DummyVecEnv.
-------------------------------------
| approxkl           | 0.0003090709 |
| clipfrac           | 0.0          |
| explained_variance | -1.14        |
| fps                | 782          |
| n_upda

#### Evaluating model

In [3]:
def synthetic_ppo_eval(ppo_model):
    attempts, correct = 0,0
    test_df = pd.DataFrame()

    env = AnemiaEnv(X_test, y_test, random=False)
    #testing_env = SyntheticComplexHbEnv(X_test, y_test, random=False)
    #env = DummyVecEnv([lambda: testing_env])
    count=0

    try:
        while True:
            count+=1
            if count%5000==0:
                print(f'Count: {count}')
            obs, done = env.reset(), False
            while not done:
                action, _states = ppo_model.predict(obs, deterministic=True)
                obs, rew, done,info = env.step(action)
                #if (done==True) & (np.isfinite(info['y_pred'])):
                if done == True:
                    test_df = test_df.append(info, ignore_index=True)
                #print('....................TEST DF ....................')
                #if len(test_df) != 0:
                #    print(test_df.head())

    except StopIteration:
        print('Testing done.....')
    return test_df


In [4]:
def get_avg_length_reward(df):
    length = np.mean(df.episode_length)
    reward = np.mean(df.reward)
    return length, reward

In [5]:
def test(ytest, ypred):
    acc = accuracy_score(ytest, ypred)
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))
    try:
        roc_auc = multiclass(ytest, ypred)
    except:
        roc_auc = None
    return acc, f1, roc_auc

In [6]:
test_df = pd.read_csv('../data/test_set_constant.csv')
test_df.head()

Unnamed: 0,hemoglobin,ferritin,ret_count,segmented_neutrophils,tibc,mcv,serum_iron,rbc,gender,creatinine,cholestrol,copper,ethanol,folate,glucose,hematocrit,tsat,label
0,7.116363,-1.0,3.781573,2.738413,-1.0,95.904198,68.457895,2.226085,0,1.892912,39.80855,110.329197,64.40435,21.654404,73.787009,21.349089,-1.0,5
1,8.12532,92.230003,4.231419,1.188039,143.365567,104.057204,204.747831,2.342554,0,0.652614,13.478089,-1.0,32.705481,-1.0,43.520272,24.375961,142.815207,1
2,11.30945,38.324563,-1.0,-1.0,455.077909,76.402602,-1.0,4.440732,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,33.92835,-1.0,4
3,13.763858,253.513394,2.262606,0.551444,453.772884,82.781943,90.101466,4.987993,0,0.853521,104.005514,34.639227,0.963866,22.083012,88.891838,41.291574,19.856071,0
4,11.464002,-1.0,-1.0,-1.0,320.964653,104.287127,-1.0,3.297819,0,1.163516,121.616315,105.895897,-1.0,9.337462,-1.0,34.392007,-1.0,7


In [7]:
X_test = test_df.iloc[:, 0:-1]
y_test = test_df.iloc[:, -1]

X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

((14000, 17), (14000,))

In [18]:
model_name = '../models/sb/ppo/seed_63/ppo_basic_63_15000000.pkl'
ppo_model = PPO2.load(model_name)
test_df = synthetic_ppo_eval(ppo_model)
test_df.head()

Loading a model without an environment, this model cannot be trained until it has a valid environment.
Count: 5000
Count: 10000
Testing done.....


Unnamed: 0,index,episode_length,reward,y_pred,y_actual,trajectory,terminated,is_success
0,0.0,1.0,1.0,5.0,5.0,[Hemolytic anemia],0.0,1.0
1,1.0,1.0,-1.0,5.0,1.0,[Hemolytic anemia],0.0,0.0
2,2.0,1.0,-1.0,5.0,4.0,[Hemolytic anemia],0.0,0.0
3,3.0,1.0,-1.0,5.0,0.0,[Hemolytic anemia],0.0,0.0
4,4.0,1.0,-1.0,5.0,7.0,[Hemolytic anemia],0.0,0.0


In [19]:
acc, f1, roc_auc = utils.get_metrics(test_df)
acc, f1, roc_auc

(12.892857142857142, 2.8551091426763677, 50.0)

In [20]:
get_avg_length_reward(test_df)[0]

1.0