In [1]:
import tensorflow as tf
import numpy as np

from tensorflow import keras
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.utils import class_weight

from gmc_model import GmcModel, get_coexist_counts
from gmc_dense import GmcDense
from data_handling import load_user_data, split_features_labels, \
    user_train_test_split
from metrics import balanced_accuracy_score

In [2]:
def sensitivity(y_true, y_pred):
    # mask where y_true is nan
    is_not_nan = tf.logical_not(tf.math.is_nan(y_true))
    y_true = tf.boolean_mask(y_true, is_not_nan)
    y_pred = tf.boolean_mask(y_pred, is_not_nan)
    # return true positive ratio
    true_positives = keras.backend.sum(tf.math.round(keras.backend.clip(
        y_true * y_pred, 0, 1)))
    possible_positives = keras.backend.sum(tf.math.round(keras.backend.clip(
        y_true, 0, 1)))
    return true_positives / (possible_positives + keras.backend.epsilon())


def specificity(y_true, y_pred):
    # mask where y_true is nan
    is_not_nan = tf.logical_not(tf.math.is_nan(y_true))
    y_true = tf.boolean_mask(y_true, is_not_nan)
    y_pred = tf.boolean_mask(y_pred, is_not_nan)
    # return true negative ratio
    true_negatives = keras.backend.sum(tf.math.round(keras.backend.clip(
        (1-y_true) * (1-y_pred), 0, 1)))
    possible_negatives = keras.backend.sum(tf.math.round(keras.backend.clip(
        1-y_true, 0, 1)))
    return true_negatives / (possible_negatives + keras.backend.epsilon())

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
keras.backend.set_floatx('float32')

# load data and reset index
data = load_user_data("/home/joschi/Documents/Studium/SS19/mu_practical_work/data")
data.reset_index(inplace=True)
X, y = split_features_labels(data)
attrs = list(X.index)
labels = list(y.index)
X = X.values
y = y.values.astype(np.float32)

In [4]:
X_train, X_test, y_train, y_test = user_train_test_split(X, y,
                                                         test_size=0.2,
                                                         random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train,
                                                      y_train,
                                                      test_size=0.25,
                                                      random_state=42,
                                                      shuffle=True)

In [5]:
# drop uuid column, the timestamps, and the label source
X_train = np.delete(X_train, [0, 1, 2, X_train.shape[1] - 1], 1)
X_valid = np.delete(X_valid, [0, 1, 2, X_train.shape[1] - 1], 1)
X_test = np.delete(X_test, [0, 1, 2, X_test.shape[1] - 1], 1)

preprocess_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="mean")),
    ('std_scaler', StandardScaler()),
])
X_train = preprocess_pipeline.fit_transform(X_train)
X_valid = preprocess_pipeline.transform(X_valid)
X_test = preprocess_pipeline.transform(X_test)

In [6]:
X_train = X_train.astype(np.float32)
X_valid = X_valid.astype(np.float32)
X_test = X_test.astype(np.float32)

In [None]:
input = keras.layers.Input(shape=(X_train.shape[1],))
hidden1 = keras.layers.Dense(150, activation="selu",
                             kernel_initializer="lecun_normal")(input)
hidden2 = keras.layers.Dense(30, activation="selu",
                           kernel_initializer="lecun_normal")(hidden1)
embedded = GmcDense(y_train.shape[1], alpha=1., y=y_train,
                   activation="sigmoid",
                   kernel_initializer="glorot_uniform")
output = embedded(hidden2)

model = keras.models.Model(inputs=[input], outputs=[output])

optimizer = keras.optimizers.Nadam(lr=1e-5)

In [8]:
model.compile(loss=embedded.gmc_loss, optimizer=optimizer,
              metrics=[specificity, sensitivity])

In [None]:
lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)
history = model.fit(X_train, y_train,
                    epochs=100, batch_size=32,
                    validation_data=(X_valid, y_valid),
                    callbacks=[lr_scheduler, early_stopping_cb])
print(history)

In [None]:
y_pred = np.round(model.predict(X_test))
y_pred_bias = np.round(model.predict(X_train))

In [None]:
ba_score = balanced_accuracy_score(y_test.T, y_pred.T)
print("Balanced accuracy: {:.2f}".format(ba_score))
ba_bias_score = balanced_accuracy_score(y_train.T, y_pred_bias.T)
print("Balanced accuracy bias: {:.2f}".format(ba_bias_score))