In [1]:
import tensorflow as tf
import pandas as pd
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold
from sklearn.metrics import roc_auc_score
from utility_funcs import get_train_labels_test, split_train_data, scale_and_as_array

import random 

random.seed(42)



In [3]:

train, labels, test = get_train_labels_test(is_py=False)

features = [f for f in train.columns if "sensor" in f]

groups = train["sequence"]
train = train.drop(["sequence", "subject", "step"], axis=1).values
test = test.drop(["sequence", "subject", "step"], axis=1).values
labels = labels["state"]

scaler = StandardScaler()
train = scaler.fit_transform(train)
test = scaler.transform(test)


train = train.reshape(-1, 60, 13)
test = test.reshape(-1, 60, 13)

assert train.shape[0] == labels.shape[0]



In [4]:


# helpers 
def train_model(model_in, test_pred_mode = False, n_folds=5):

    gkf = GroupKFold(n_folds)
    store = []

    model_in.summary()

    for fold, (train_idx, val_idx) in enumerate(
        gkf.split(train, labels, groups.unique())
    ):
        print(f"Fitting fold {fold} for {model_in.name}...")
        model = keras.models.clone_model(model_in)
        model.compile(
            optimizer="adam", loss="binary_crossentropy", metrics=[keras.metrics.AUC()]
        )

        X_train, X_val = train[train_idx], train[val_idx]
        y_train, y_val = labels.iloc[train_idx], labels.iloc[val_idx]

        history = model.fit(
            X_train,
            y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            verbose=1,
            batch_size=128,
            callbacks=[
                keras.callbacks.EarlyStopping(
                    patience=10, monitor="val_loss", restore_best_weights=True
                )
            ],
        )
        auc = roc_auc_score(y_val, model.predict(X_val).squeeze())
        print(f"The val auc for fold {fold}, {model_in.name} is {auc}")

        if test_pred_mode:
            store.append(model.predict(test).squeeze())
        else:
            store.append(auc)
            
    result = sum(store) / n_folds # if test mode we want the prediction
    return result


model = keras.models.Sequential([
    keras.layers.Input(shape=(60, train.shape[2])),
    keras.layers.Bidirectional(keras.layers.LSTM(512, return_sequences=True)), 
    keras.layers.Bidirectional(keras.layers.LSTM(256, return_sequences=True)), 
    keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True)), 
    keras.layers.Bidirectional(keras.layers.LSTM(64, return_sequences=True)),
    keras.layers.Bidirectional(keras.layers.GRU(32, return_sequences=True)),
    keras.layers.Flatten(),
    keras.layers.Dense(200, activation="swish"),
    keras.layers.Dense(150, activation="swish"),
    keras.layers.Dense(50, activation="swish"),
    keras.layers.Dense(1, activation="sigmoid"),
], name = 'RNN_model_6')


preds = train_model(model, test_pred_mode = True)
   


Model: "RNN_model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 60, 1024)         2154496   
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 60, 512)          2623488   
 nal)                                                            
                                                                 
 bidirectional_2 (Bidirectio  (None, 60, 256)          656384    
 nal)                                                            
                                                                 
 bidirectional_3 (Bidirectio  (None, 60, 128)          164352    
 nal)                                                            
                                                                 
 bidirectional_4 (Bidirectio  (None, 60, 64)           

In [12]:
sub = pd.read_csv('../Data/sample_submission.csv')
sub['state'] = preds.round()
sub.to_csv('../Submissions/rnn_sub.csv', index = False)

In [13]:
sub

Unnamed: 0,sequence,state
0,25968,1.0
1,25969,1.0
2,25970,0.0
3,25971,1.0
4,25972,1.0
...,...,...
12213,38181,0.0
12214,38182,1.0
12215,38183,0.0
12216,38184,0.0
