# Imports
---

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold
import tensorflow as tf
from tensorflow import keras

# Load
---

In [None]:
df_train = pd.read_csv('../input/tabular-playground-series-apr-2022/train.csv')
df_test = pd.read_csv('../input/tabular-playground-series-apr-2022/test.csv')
df_labels = pd.read_csv('../input/tabular-playground-series-apr-2022/train_labels.csv')

In [None]:
df_train.head()

In [None]:
df_train.sequence.value_counts(), df_labels.shape

In [None]:
df_labels

In [None]:
groups = df_train['sequence']
df_train = df_train.drop(['sequence', 'subject', 'step'], inplace=False, axis=1).values
df_test = df_test.drop(['sequence', 'subject', 'step'], inplace=False, axis=1).values
df_labels = df_labels.state

sc = StandardScaler()

sc.fit(df_train)
df_train = sc.transform(df_train)
df_test = sc.transform(df_test)

df_train = df_train.reshape(int(len(df_train)/60), 60, 13)
df_test = df_test.reshape(int(len(df_test)/60), 60, 13)

# Train
---

In [None]:
def BuildNN():
    with tpu_strategy.scope():
        model = keras.models.Sequential([
            keras.layers.Input(shape=(60, 13)),
            keras.layers.LSTM(500, return_sequences=True),
            keras.layers.LSTM(400, return_sequences=True),
            keras.layers.LSTM(300, return_sequences=True),
            keras.layers.LSTM(200, return_sequences=True),
            keras.layers.Conv1D(32, 7),
            keras.layers.MaxPooling1D(),
            keras.layers.Conv1D(64, 3),
            keras.layers.MaxPooling1D(),
            keras.layers.Conv1D(128, 3),
            keras.layers.GlobalMaxPooling1D(),
            keras.layers.Dense(150, activation="swish"),
            keras.layers.Dense(50, activation="swish"),
            keras.layers.Dense(1, activation="sigmoid")
        ])

        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=[keras.metrics.AUC()])
    return model

In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
cv_score = 0
test_preds = []
n_split = 5
kf = GroupKFold(n_splits=n_split)
for fold_idx, (train_idx, valid_idx) in enumerate(kf.split(df_train, df_labels, groups.unique())):
    
    print("*"*15, f"Fold {fold_idx+1}", "*"*15)
    
    X_train, X_valid = df_train[train_idx], df_train[valid_idx]
    y_train, y_valid = df_labels.iloc[train_idx].values, df_labels.iloc[valid_idx].values
    
    model = BuildNN()
    model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=100, batch_size=256, 
              callbacks=[keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)])
    
    cv_score += roc_auc_score(y_valid, model.predict(X_valid).squeeze())
    
    test_preds.append(model.predict(df_test).squeeze())
    
print(cv_score/n_split)

# Test Submission
---

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-apr-2022/sample_submission.csv')

submission["state"] = sum(test_preds)/n_split
submission.to_csv("submission.csv", index=False)
submission