In [36]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import joblib
import os
import tensorflow
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

In [28]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tensorflow.set_random_seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)

#### The data

In [29]:
unspecified_df = pd.read_csv('data/unspecified_anemia_dataset.csv')
unspecified_df = unspecified_df.drop(['rbc', 'mentzer_index', 'iron'], axis=1)
unspecified_df['label'] = 7
unspecified_df['hemoglobin'] = np.random.uniform(3, 12, len(unspecified_df))
unspecified_df = unspecified_df[['hemoglobin', 'ferritin', 'ret_count', 'segmented_neutrophils', 'tibc', 'mcv', 'label']]
unspecified_df.head()

Unnamed: 0,hemoglobin,ferritin,ret_count,segmented_neutrophils,tibc,mcv,label
0,6.370861,0.0,0.0,0.0,0.0,102.54106,7
1,11.556429,0.0,0.0,0.0,0.0,102.449492,7
2,9.587945,0.0,0.0,0.0,0.0,102.357827,7
3,8.387926,0.0,0.0,0.0,0.0,100.862312,7
4,4.404168,0.0,0.0,0.0,0.0,100.814369,7


In [30]:
X_test = np.array(unspecified_df.drop(['label'], axis=1))
y_test = np.array(unspecified_df['label'])

In [31]:
X_test.shape

(1604, 6)

#### DQN

In [32]:
from envs import SyntheticComplexHbEnv
from stable_baselines import DQN

In [33]:
dqn_model = DQN.load('models/synthentic_with_hb_some_nans_stable_dqn2e6.pkl')

Loading a model without an environment, this model cannot be trained until it has a valid environment.


In [34]:
test_df = pd.DataFrame()

env = SyntheticComplexHbEnv(X_test, y_test, random=False)
count=0

try:
    while True:
        count+=1
        if count%5000==0:
            print(f'Count: {count}')
        obs, done = env.reset(), False
        while not done:
            action, _states = dqn_model.predict(obs, deterministic=True)
            obs, rew, done,info = env.step(action)
            #if (done==True) & (np.isfinite(info['y_pred'])):
            if done == True:
                test_df = test_df.append(info, ignore_index=True)
            #print('....................TEST DF ....................')
            #if len(test_df) != 0:
            #    print(test_df.head())

except StopIteration:
    print('Testing done.....')

test_df.head()

Testing done.....


Unnamed: 0,episode_length,index,is_success,reward,terminated,trajectory,y_actual,y_pred
0,8.0,0.0,0.0,-2.0,1.0,"[hemoglobin, mcv, segmented_neutrophils, segme...",7.0,
1,8.0,1.0,0.0,-2.0,1.0,"[hemoglobin, mcv, segmented_neutrophils, segme...",7.0,
2,8.0,2.0,0.0,-2.0,1.0,"[hemoglobin, mcv, segmented_neutrophils, segme...",7.0,
3,8.0,3.0,0.0,-2.0,1.0,"[hemoglobin, mcv, segmented_neutrophils, segme...",7.0,
4,8.0,4.0,0.0,-2.0,1.0,"[hemoglobin, mcv, segmented_neutrophils, segme...",7.0,


In [37]:
dt = joblib.load('models/baselines/decision_tree.joblib')

In [43]:
unspecified_df['dt_prediction']= dt.predict(X_test)
unspecified_df.dt_prediction.value_counts()

2    1604
Name: dt_prediction, dtype: int64

In [None]:
unspecified_df['dt_prediction'].va