In [None]:
import os
import numpy as np
from sklearn import metrics

from keras.callbacks import EarlyStopping
from tqdm.keras import TqdmCallback

from utils import Config, FeedForwardNetwork, Metric, Standardizer

In [None]:
data_folder = './data/mimic3_17f_24h/'
data_filename = os.path.join(data_folder, 'imputed-normed-ep_1_24.npz')
folds_filename = os.path.join(data_folder, '5-folds.npz')
features_filename = os.path.join(data_folder, 'input.csv')
results_folder = './results/mimic3_17f_24h/'

In [None]:
if not os.path.exists(data_folder):
    print(f'Wrong data_folder specified. This folder must exist')
    exit(1)

if not os.path.exists(results_folder):
    os.makedirs(results_folder)

In [None]:
config = Config()
config

In [None]:
folds_file = np.load(folds_filename, allow_pickle=True)
folds = folds_file['folds_ep_mor'][config.label_type][0]

data_file = np.load(data_filename, allow_pickle=True)
y = data_file['adm_labels_all'][:, config.label_type]
y = (y > 0).astype(float)

X = np.genfromtxt(features_filename, delimiter=',')

In [None]:
TASK_NAME = 'Mortality'
CLF_NAME = 'TraditionalFeedForwardNetwork'

In [None]:
metrics = [
    Metric('Accuracy', metrics.accuracy_score, use_soft=False),
    Metric('Precision', metrics.precision_score, use_soft=False),
    Metric('Recall', metrics.recall_score, use_soft=False),
    Metric('F1 score', metrics.f1_score, use_soft=False),
    Metric('ROC AUC', metrics.roc_auc_score, use_soft=True),
    Metric('Average precision', metrics.average_precision_score, use_soft=True),
]

In [None]:
def build_model(n_features):
    clf = FeedForwardNetwork(
        n_features=n_features,
        final_activation=config.final_activation,
        ffn_depth=config.ffn_depth,
        batch_normalization=config.batch_normalization
        
    )
    clf.compile(loss=config.loss_func, learning_rate=config.learning_rate)
    return clf

In [None]:
for train_idx, valid_idx, test_idx in folds:
    train_idx = np.concatenate((train_idx, valid_idx))
    
    standardizer = Standardizer()
    standardizer.fit(X[train_idx])
    X_transformed = standardizer.transform(X)

    clf = build_model(n_features=X.shape[1])
    clf.fit(
        X_transformed[train_idx], y[train_idx],
        batch_size=config.batch_size, epochs=config.epochs,
        callbacks=[
            EarlyStopping(patience=config.early_stopping_patience, restore_best_weights=True),
            TqdmCallback(verbose=1)
        ])
    
    y_true = y[test_idx]
    y_soft = clf.predict(X_transformed[test_idx])
    y_pred = clf.predict_classes(X_transformed[test_idx])

    for metric in metrics:
        if metric.use_soft:
            score = metric.function(y_true, y_soft)
        else:
            score = metric.function(y_true, y_pred)
        metric.scores.append(score)

In [None]:
results_filename = os.path.join(results_folder, f'{TASK_NAME}_{CLF_NAME}.txt')

In [None]:
with open(results_filename, 'w') as f:
    f.write(f'{TASK_NAME} {CLF_NAME}\n\n')
    f.write(f'{str(config)}\n\n')
    for metric in metrics:
        mean, std = np.mean(metric.scores), np.std(metric.scores)
        print(f'{metric.name}: {mean:.5f} ± {std:.5f}')
        f.write(f'{metric.name}: {mean:.5f} ± {std:.5f}\n')