In [None]:
import tensorflow as tf
import numpy as np

from tensorflow import keras
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.utils import class_weight

from gmc_loss import GmcLoss, get_coexist_counts
from data_handling import load_user_data, split_features_labels, \
    user_train_test_split

In [None]:
def sensitivity(y_true, y_pred):
    # mask where y_true is nan
    is_not_nan = tf.logical_not(tf.math.is_nan(y_true))
    y_true = tf.boolean_mask(y_true, is_not_nan)
    y_pred = tf.boolean_mask(y_pred, is_not_nan)
    # return true positive ratio
    true_positives = keras.backend.sum(tf.math.round(keras.backend.clip(
        y_true * y_pred, 0, 1)))
    possible_positives = keras.backend.sum(tf.math.round(keras.backend.clip(
        y_true, 0, 1)))
    return true_positives / (possible_positives + keras.backend.epsilon())


def specificity(y_true, y_pred):
    # mask where y_true is nan
    is_not_nan = tf.logical_not(tf.math.is_nan(y_true))
    y_true = tf.boolean_mask(y_true, is_not_nan)
    y_pred = tf.boolean_mask(y_pred, is_not_nan)
    # return true negative ratio
    true_negatives = keras.backend.sum(tf.math.round(keras.backend.clip(
        (1-y_true) * (1-y_pred), 0, 1)))
    possible_negatives = keras.backend.sum(tf.math.round(keras.backend.clip(
        1-y_true, 0, 1)))
    return true_negatives / (possible_negatives + keras.backend.epsilon())

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
keras.backend.set_floatx('float32')

# load data and reset index
data = load_user_data("/home/joschi/Documents/Studium/SS19/mu_practical_work/data")
data.reset_index(inplace=True)
X, y = split_features_labels(data)
attrs = list(X.index)
labels = list(y.index)
X = X.values
y = y.values.astype(np.float32)

In [None]:
X_train, X_test, y_train, y_test = user_train_test_split(X, y,
                                                         test_size=0.2,
                                                         random_state=42)
X_train, X_valid, y_train, y_valid = user_train_test_split(X_train,
                                                           y_train,
                                                           test_size=0.25,
                                                           random_state=42)

In [None]:
# drop uuid column, the timestamps, and the label source
X_train = np.delete(X_train, [0, 1, 2, X_train.shape[1] - 1], 1)
X_valid = np.delete(X_valid, [0, 1, 2, X_train.shape[1] - 1], 1)
X_test = np.delete(X_test, [0, 1, 2, X_test.shape[1] - 1], 1)

preprocess_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="mean")),
    ('std_scaler', StandardScaler()),
])
X_train = preprocess_pipeline.fit_transform(X_train)
X_valid = preprocess_pipeline.transform(X_valid)
X_test = preprocess_pipeline.transform(X_test)

In [None]:
is_nan = np.isnan(y_train)
y_train_clean = y_train[~is_nan]
class_weights = class_weight.compute_class_weight("balanced",
                                                  np.unique(y_train_clean),
                                                  y_train_clean)

In [None]:
X_train = X_train.astype(np.float32)
X_valid = X_valid.astype(np.float32)
X_test = X_test.astype(np.float32)

In [None]:
model = keras.models.Sequential([
    keras.layers.Dense(150, activation="selu",
                       input_shape=X_train.shape[1:],
                       kernel_initializer="lecun_normal"),
    keras.layers.Dense(30, activation="selu",
                      kernel_initializer="lecun_normal"),
    keras.layers.Dense(y_train.shape[1], activation="sigmoid",
                       kernel_initializer="glorot_uniform")
])

gmc_loss = GmcLoss(y_train, alpha=1e-8)
optimizer = keras.optimizers.Adam(lr=0.001)

model.compile(loss=gmc_loss, optimizer=optimizer,
              metrics=[specificity, sensitivity])

In [None]:
lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=3,
                                                  restore_best_weights=True)
history = model.fit(X_train, y_train, 
                    class_weight=class_weights,
                    epochs=100, batch_size=64,
                    validation_data=(X_valid, y_valid),
                    callbacks=[lr_scheduler, early_stopping_cb])

In [None]:
print(gmc_loss.coexist_counts)

In [None]:
y_pred = model.predict(X_test)
print(y_pred[0])
y_pred = np.round(y_pred)
y_pred_bias = np.round(model.predict(X_train))

In [None]:
from metrics import balanced_accuracy_score

ba_score = balanced_accuracy_score(y_test.T, y_pred.T, average="macro")
print("Balanced accuracy: {:.2f}".format(ba_score))
ba_bias_score = balanced_accuracy_score(y_train.T, y_pred_bias.T, average="macro")
print("Balanced accuracy bias: {:.2f}".format(ba_bias_score))