In [17]:
from numpy.random import seed
seed(1)
from tensorflow.compat.v1 import set_random_seed
set_random_seed(2)

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import robust_scale
tf.keras.backend.clear_session()

In [2]:
#From here: http://arseny.info/2017/f-beta-score-for-keras.html
from sklearn.metrics import fbeta_score
from tensorflow.keras import backend as K


def fbeta(y_true, y_pred, threshold_shift=0):
    beta = 2

    # just in case of hipster activation at the final layer
    y_pred = K.clip(y_pred, 0, 1)

    # shifting the prediction threshold from .5 if needed
    y_pred_bin = K.round(y_pred + threshold_shift)

    tp = K.sum(K.round(y_true * y_pred_bin)) + K.epsilon()
    fp = K.sum(K.round(K.clip(y_pred_bin - y_true, 0, 1)))
    fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)))

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    beta_squared = beta ** 2
    return (beta_squared + 1) * (precision * recall) / (beta_squared * precision + recall)

In [3]:
df = pd.read_csv("../../data/interim/train.csv")

X = df.drop(columns = ["y", "Participant"])

y = df["y"]

In [4]:
val = pd.read_csv("../../data/interim/validation.csv")
val_X = val[[f"X{n}" for n in range(1, 179)]].to_numpy()[:, :, np.newaxis]
val_y = val["y"]

In [5]:
train, test = train_test_split(
    df, test_size=0.3, random_state=0, stratify=df[["Participant", "y"]]
)

In [12]:
train_X = train[[f"X{n}" for n in range(1, 179)]].to_numpy()[:, :, np.newaxis]

test_X = test[[f"X{n}" for n in range(1, 179)]].to_numpy()[:, :, np.newaxis]

train_y = train["y"]
test_y = test["y"]

In [22]:
model = keras.models.Sequential(
    [TCN(
         input_shape=[178, 1],
         kernel_size=2, 
         activation="relu", 
         dilations=[rate for rate in (1, 2, 4, 8) * 2],
         return_sequences=False,
        ),
     Dense(1, activation='sigmoid')
    ]
)

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=[fbeta],
             )
history = model.fit(train_X, train_y.to_numpy(),
                    epochs=40,
                    batch_size=32,
                    validation_data=[test_X, test_y.to_numpy()])

Train on 5578 samples, validate on 2391 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [23]:
pred_val = model.predict(val_X.astype(float))

pred_val_rounded = pred_val.round()

score = fbeta_score(val_y, pred_val_rounded, beta=2)

score

0.8323299217338953

In [24]:
%%timeit
pred_val = model.predict(val_X.astype(float)).round()

1.12 s ± 13.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
