In [2]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
import tensorflow
import os
import utils
from envs import SyntheticComplexHbEnv
from stable_baselines.common.env_checker import check_env
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import DQN
from stable_baselines import bench, logger
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tensorflow.set_random_seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)

#### Functions

In [4]:
def sample_train_set(x, y, sample_num):
    idx_list = random.sample(list(x.index), sample_num)
    sampled_x = x.loc[idx_list]
    sampled_y = y.loc[idx_list]
    return np.array(sampled_x), np.array(sampled_y)

In [5]:
def stable_dqn(X_train, y_train, timesteps, model_file_name):
    training_env = SyntheticComplexHbEnv(X_train, y_train)
    env = bench.Monitor(training_env, logger.get_dir())
    model = DQN('MlpPolicy', training_env, verbose=1, seed=SEED, n_cpu_tf_sess=1)
    model.learn(total_timesteps=timesteps, log_interval=10000)
    model.save(f'models/{model_file_name}.pkl')
    env.close()
    return model

In [6]:
def evaluate_dqn(dqn_model, X_test, y_test):
    test_df = pd.DataFrame()
    test_env = SyntheticComplexHbEnv(X_test, y_test, random=False)
    count=0

    try:
        while True:
            count+=1
            if count%5000==0:
                print(f'Count: {count}')
            obs, done = test_env.reset(), False
            while not done:
                action, _states = dqn_model.predict(obs, deterministic=True)
                obs, rew, done,info = test_env.step(action)
                if done == True:
                    test_df = test_df.append(info, ignore_index=True)
    except StopIteration:
        print('Testing done.....')
    return test_df

#### The Data

In [7]:
df = pd.read_csv('data/anemia_synth_dataset_hb_some_nans.csv') #my real dataset i think
df = df.fillna(0)
classes = list(df.label.unique())
nums = [i for i in range(len(classes))]
class_dict = dict(zip(classes, nums))
print(class_dict)
df['label'] = df['label'].replace(class_dict)
print(df.label.value_counts())
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

full_X_train, X_test, full_y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=SEED)
X_test, y_test = np.array(X_test), np.array(y_test)
full_X_train.shape, X_test.shape, full_y_train.shape, y_test.shape

{'No anemia': 0, 'Hemolytic anemia': 1, 'Aplastic anemia': 2, 'Iron deficiency anemia': 3, 'Vitamin B12/Folate deficiency anemia': 4, 'Anemia of chronic disease': 5}
1    14146
0    10000
2     9450
5     1869
4     1575
3     1343
Name: label, dtype: int64


((26868, 6), (11515, 6), (26868,), (11515,))

In [8]:
train_sizes = [500, 1000, 3000, 5000, 10000]

#### 2M timetseps

In [9]:
for train_size in train_sizes:
    print(f'Train size {train_size} starting ...')
    X_train, y_train = sample_train_set(full_X_train, full_y_train, train_size)
    unique, counts = np.unique(y_train, return_counts=True)
    if len(unique) != 6:
        print(f'Unique classes: {len(unique)}')
        print(f'Skipping {train_size} because only {len(np.unique(y_train))} classes are in the sample')
    else:
        counts_dict = dict(zip(unique, counts))
        print(counts_dict)
        dqn_model = stable_dqn(X_train, y_train, int(1e6), f'train_sizes/{train_size}_2e6')
        test_df = evaluate_dqn(dqn_model, X_test, y_test)
        y_pred_df, success_df, success_rate = utils.get_success_rate(test_df)
        print(f'Success rate: {success_rate}')
        
        test_df.to_csv('test_dfs/train_sizes/test_df_1e6.csv', index=False)
        y_pred_df.to_csv('test_dfs/train_sizes/y_pred_df_1e6.csv', index=False)
        success_df.to_csv('test_dfs/train_sizes/success_df_1e6.csv', index=False)
        
        avg_length, avg_return = utils.get_avg_length_reward(test_df)
        print(f'Average length: {avg_length}')
        print(f'Average return: {avg_return}')
        
        acc, f1, roc_auc = utils.test(y_pred_df['y_actual'], y_pred_df['y_pred'])
        print(f'Accuracy: {acc}')
        print(f'F1: {f1}')
        print(f'ROC-AUC: {roc_auc}')

        

Train size 500 starting ...
{0: 118, 1: 186, 2: 141, 3: 19, 4: 17, 5: 19}
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use tf.cast instead.
--------------------------------------
| % time spent exploring  | 89       |
| episodes                | 10000    |
| mean 100 episode reward | 0.1      |
| steps                   | 21069    |
| success rate            | 0.19     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 78       |
| episodes                | 20000    |
| mean 100 episode reward | 0.1      |
| steps                   | 44618    |
| success rate            | 0.16     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 64       |
| episodes                | 30000    |
| mean 100 episode reward | 0        |
| steps                   | 71491    |
| su

--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 280000   |
| mean 100 episode reward | 2.7      |
| steps                   | 1088386  |
| success rate            | 0.84     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 290000   |
| mean 100 episode reward | 2.6      |
| steps                   | 1129143  |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 300000   |
| mean 100 episode reward | 2.9      |
| steps                   | 1170609  |
| success rate            | 0.87     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 310000   |
| mean 100 episode reward | 2.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 70000    |
| mean 100 episode reward | 2.7      |
| steps                   | 233600   |
| success rate            | 0.8      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 80000    |
| mean 100 episode reward | 2.9      |
| steps                   | 273117   |
| success rate            | 0.87     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 90000    |
| mean 100 episode reward | 2.7      |
| steps                   | 312459   |
| success rate            | 0.83     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 100000   |
| mean 100 episode reward | 1.1      |
| steps                  

--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 380000   |
| mean 100 episode reward | 3.1      |
| steps                   | 1471444  |
| success rate            | 0.89     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 390000   |
| mean 100 episode reward | 3.2      |
| steps                   | 1510174  |
| success rate            | 0.97     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 400000   |
| mean 100 episode reward | 3.1      |
| steps                   | 1548087  |
| success rate            | 0.88     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 410000   |
| mean 100 episode reward | 3.1      |
| steps                  

--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 160000   |
| mean 100 episode reward | 2.7      |
| steps                   | 599382   |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 170000   |
| mean 100 episode reward | 2.4      |
| steps                   | 639656   |
| success rate            | 0.79     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 180000   |
| mean 100 episode reward | 2.4      |
| steps                   | 680448   |
| success rate            | 0.76     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 190000   |
| mean 100 episode reward | 2.9      |
| steps                  

--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 470000   |
| mean 100 episode reward | 2.4      |
| steps                   | 1846976  |
| success rate            | 0.77     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 480000   |
| mean 100 episode reward | 2.9      |
| steps                   | 1887874  |
| success rate            | 0.85     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 490000   |
| mean 100 episode reward | 2.6      |
| steps                   | 1928342  |
| success rate            | 0.81     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 500000   |
| mean 100 episode reward | 2.6      |
| steps                  

--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 260000   |
| mean 100 episode reward | 2.4      |
| steps                   | 989813   |
| success rate            | 0.82     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 270000   |
| mean 100 episode reward | 3        |
| steps                   | 1028211  |
| success rate            | 0.85     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 280000   |
| mean 100 episode reward | 2.8      |
| steps                   | 1066824  |
| success rate            | 0.86     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 290000   |
| mean 100 episode reward | 1.9      |
| steps                  

--------------------------------------
| % time spent exploring  | 64       |
| episodes                | 30000    |
| mean 100 episode reward | 0.1      |
| steps                   | 71491    |
| success rate            | 0.13     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 49       |
| episodes                | 40000    |
| mean 100 episode reward | -0.2     |
| steps                   | 103826   |
| success rate            | 0.11     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 28       |
| episodes                | 50000    |
| mean 100 episode reward | -0.1     |
| steps                   | 144938   |
| success rate            | 0.19     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 6        |
| episodes                | 60000    |
| mean 100 episode reward | 2.5      |
| steps                  

--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 340000   |
| mean 100 episode reward | 2.4      |
| steps                   | 1298354  |
| success rate            | 0.86     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 350000   |
| mean 100 episode reward | 2.7      |
| steps                   | 1337345  |
| success rate            | 0.85     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 360000   |
| mean 100 episode reward | 2.9      |
| steps                   | 1375583  |
| success rate            | 0.9      |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 370000   |
| mean 100 episode reward | 2.7      |
| steps                  