In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np 

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold
from sklearn.metrics import roc_auc_score
from utility_funcs import get_train_labels_test, split_train_data, scale_and_as_array


In [None]:
train, labels, test = get_train_labels_test()

In [None]:
features = [f for f in train.columns if 'sensor' in f]

In [None]:
groups = train["sequence"]
train = train.drop(["sequence", "subject", "step"], axis=1).values
test = test.drop(["sequence", "subject", "step"], axis=1).values
labels = labels["state"]

scaler = StandardScaler()
train = scaler.fit_transform(train)
test = scaler.transform(test)


train = train.reshape(-1, 60, 13)
test = test.reshape(-1, 60, 13)

In [None]:
assert train.shape[0] == labels.shape[0]

### Helper functions 

In [None]:
def compile_model(model):
    model.compile(
        optimizer="adam", loss="binary_crossentropy", metrics=[keras.metrics.AUC()]
    )
    return model


def train_model(model_in):
    
    gkf = GroupKFold(3)
    val_rocs = []


    for fold, (train_idx, val_idx) in enumerate(
        gkf.split(train, labels, groups.unique())
    ):
        print(f'Fitting fold {fold} for {model_in.name}')
        model = keras.models.clone_model(model_in)
        model.compile(
            optimizer="adam", loss="binary_crossentropy", metrics=[keras.metrics.AUC()]
        )

        X_train, X_val = train[train_idx], train[val_idx]
        y_train, y_val = labels.iloc[train_idx], labels.iloc[val_idx]

        model.fit(
            X_train,
            y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            verbose = 0,
            batch_size=256,
            callbacks=[
                keras.callbacks.EarlyStopping(
                    patience=5, monitor="val_loss", restore_best_weights=True
                )
            ],
        )
        auc = roc_auc_score(y_val, model.predict(X_val).squeeze())
        print(f'The val auc for fold {fold}, {model_in.name} is {auc}')
        val_rocs.append(auc)

    result = sum(val_rocs)/3
    return result




### test models

In [None]:
model_1 = keras.models.Sequential([
    keras.layers.Flatten(input_shape = (60, train.shape[2])),
    keras.layers.Dense(50, activation = 'relu'),
    keras.layers.Dense(1, activation = 'sigmoid')
], name = 'model_1')


model_2 = keras.models.Sequential([
    keras.layers.Flatten(input_shape = (60, train.shape[2])),
    keras.layers.Dense(200, activation = 'relu'),
    keras.layers.Dense(100, activation = 'relu'),
    keras.layers.Dense(50, activation = 'relu'),
    keras.layers.Dense(1, activation = 'sigmoid')
], name = 'model_2')

models = [model_1, model_2]

model_dicts = {}
for i, model_in in enumerate(models):
    val_auc = train_model(model_in)
    model_dicts[model_in.name] = val_auc
    
