*Disclaimer*. This is my first kaggle competition, for learning purposes. Some of the code is took from other users, listed in the references at the end of this notebook. 

## Imports

In [None]:
import os
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold
from sklearn.metrics import roc_auc_score

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# 1. Data

In [None]:
train_file = '/kaggle/input/tabular-playground-series-apr-2022/train.csv'
labels_file = '/kaggle/input/tabular-playground-series-apr-2022/train_labels.csv'
test_file = '/kaggle/input/tabular-playground-series-apr-2022/test.csv'

In [None]:
train_df = pd.read_csv(train_file)
labels_df = pd.read_csv(labels_file)
test_df = pd.read_csv(test_file)

In [None]:
train_df.info()

In [None]:
# Define some constants
NUM_SENSORS = 13
SEQ_LENGTH = train_df.step.nunique()

In [None]:
groups = list(train_df["sequence"].unique())
train = train_df.drop(['sequence', 'subject','step'], axis=1).values
test = test_df.drop(['sequence', 'subject','step'], axis=1).values
labels = labels_df['state']

In [None]:
scaler = StandardScaler()
scaler.fit(train)
train = scaler.transform(train)
test = scaler.transform(test)

In [None]:
train = train.reshape(int(train.shape[0]/60), SEQ_LENGTH, train.shape[-1])
test = test.reshape(int(test.shape[0]/60), SEQ_LENGTH, train.shape[-1])

In [None]:
# Series X Steps X Sensors 
train.shape

# 2. Model

In [None]:
class FCNBlock(layers.Layer):
    def __init__(self, filters, size):
        super().__init__()
        self.conv = layers.Conv1D(filters, size)
        self.bn = layers.BatchNormalization()
        
    def call(self, x):
        x = self.bn(self.conv(x))
        return tf.keras.activations.relu(x)
    
def build_fcnn_model():
    input_x = layers.Input((train.shape[1], train.shape[2]))
    x = FCNBlock(128, 8)(input_x)
    x = FCNBlock(256, 5)(x)
    x = FCNBlock(128, 3)(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(1, activation='sigmoid')(x)
    model = Model(input_x, x, name='fully_convolutional')
    model.compile(loss='binary_crossentropy', optimizer='adam',
                  metrics=['AUC'])
    return model

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
    BATCH_SIZE = tpu_strategy.num_replicas_in_sync * 64
    print("Running on TPU:", tpu.master())
    print(f"Batch Size: {BATCH_SIZE}")
        
except ValueError:
    tpu_strategy = tf.distribute.get_strategy()
    BATCH_SIZE = 256
    print(f"Running on {tpu_strategy.num_replicas_in_sync} replicas")
    print(f"Batch Size: {BATCH_SIZE}")

In [None]:
SPLITS = 5

with tpu_strategy.scope():
    predictions, cv_scores = [], []
    kf = GroupKFold(n_splits = SPLITS)
    
    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(train, labels, groups)):
        print()
        print("="*15, f"Fold{fold_idx+1}", "="*15)
        # Data 
        X_train, X_val = train[train_idx], train[val_idx]
        y_train, y_val = labels.iloc[train_idx].values, labels.iloc[val_idx].values
        
        # Model
        model = build_fcnn_model()
        lr = ReduceLROnPlateau(monitor="val_auc", factor=0.6, 
                               patience=4)
        es = EarlyStopping(monitor="val_auc", patience=20, mode="max", 
                           restore_best_weights=True)
        
        model.fit(X_train, y_train, 
          validation_data=(X_val, y_val), 
          epochs=100,
          batch_size=BATCH_SIZE, 
          callbacks=[lr, es])
        
        cv_scores.append(roc_auc_score(y_val, model.predict(X_val).squeeze()))
        predictions.append(model.predict(test).squeeze())#

In [None]:
print(f'Mean accuracy on {kf.n_splits} folds: {np.mean(cv_scores)}')

In [None]:
sub = pd.read_csv("../input/tabular-playground-series-apr-2022/sample_submission.csv")
sub['state'] = sum(predictions)/SPLITS
sub.to_csv("fcn_submission.csv", index=False)

# 3. References 
1. [LSTM Baseline](https://www.kaggle.com/code/ryanbarretto/lstm-baseline)
2. [Tps April Tensorflow Bi-LSTM](https://www.kaggle.com/code/hamzaghanmi/tps-april-tensorflow-bi-lstm)