In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from IPython.display import display
from sklearn.model_selection import GroupKFold
from sklearn.metrics import roc_auc_score

In [None]:
train = pd.read_csv("../input/tabular-playground-series-apr-2022/train.csv")
test = pd.read_csv("../input/tabular-playground-series-apr-2022/test.csv")
submission = pd.read_csv("../input/tabular-playground-series-apr-2022/sample_submission.csv")
labels = pd.read_csv("../input/tabular-playground-series-apr-2022/train_labels.csv")

In [None]:
display(train)
display(test)
display(submission)
display(labels)

In [None]:
groups = train["sequence"]
train = train.drop(["sequence", "subject", "step"], inplace=False, axis=1).values
test = test.drop(["sequence", "subject", "step"], inplace=False, axis=1).values
labels = labels["state"]
train = train.reshape(int(len(train)/60), 60, 13)
test = test.reshape(int(len(test)/60), 60, 13)

In [None]:
def BuildNN():
    with tpu_strategy.scope():
        model = keras.models.Sequential([
            keras.layers.Input(shape=(60, 13)),
            keras.layers.LSTM(500, return_sequences=True),
            keras.layers.LSTM(400, return_sequences=True),
            keras.layers.LSTM(300, return_sequences=True),
            keras.layers.LSTM(200, return_sequences=True),
            keras.layers.Conv1D(32, 7),
            keras.layers.MaxPooling1D(),
            keras.layers.Conv1D(64, 3),
            keras.layers.MaxPooling1D(),
            keras.layers.Conv1D(128, 3),
            keras.layers.GlobalMaxPooling1D(),
            keras.layers.Dense(150, activation="swish"),
            keras.layers.Dense(50, activation="swish"),
            keras.layers.Dense(1, activation="sigmoid")
        ])

        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=[keras.metrics.AUC()])
    return model

In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
cv_score = 0
test_preds = []
kf = GroupKFold(n_splits=5)
for fold_idx, (train_idx, valid_idx) in enumerate(kf.split(train, labels, groups.unique())):
    
    print("*"*15, f"Fold {fold_idx+1}", "*"*15)
    
    X_train, X_valid = train[train_idx], train[valid_idx]
    y_train, y_valid = labels.iloc[train_idx].values, labels.iloc[valid_idx].values
    
    model = BuildNN()
    model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=100, batch_size=256, 
              callbacks=[keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)])
    
    cv_score += roc_auc_score(y_valid, model.predict(X_valid).squeeze())
    
    test_preds.append(model.predict(test).squeeze())
    
print(cv_score/5)

In [None]:
submission["state"] = sum(test_preds)/5
submission.to_csv("submission.csv", index=False)
submission