In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.style as style
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from IPython.display import display
from sklearn.model_selection import GroupKFold
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings("ignore")

In [None]:
# Train data

# sequence - a unique id for each sequence
# subject - a unique id for the subject in the experiment
# step - time step of the recording, in one second intervals
# sensor_00 - sensor_12 - the value for each of the thirteen sensors at that time step
train = pd.read_csv('../input/tabular-playground-series-apr-2022/train.csv')
train

In [None]:
train.describe().T

In [None]:
train.sequence.nunique(),train.subject.nunique(),train.step.nunique()

In [None]:
train.isna().sum()

In [None]:
train.duplicated().sum()

In [None]:
plt.figure(figsize=(12,12))
g = sns.heatmap(train.iloc[:,3:].corr(),
                fmt='.2f',
                annot=True, 
                annot_kws={'size': 8} ,
                cmap=sns.diverging_palette(20, 220, as_cmap=True)
            )

In [None]:
#Test data

test = pd.read_csv('../input/tabular-playground-series-apr-2022/test.csv')
test

In [None]:
features = train.iloc[:,3:]
i = 1
plt.figure()
fig, ax = plt.subplots(3, 5,figsize=(20, 20))
for feature in features:
    plt.subplot(3,5,i)
    sns.distplot(train[feature],color="blue", kde=True, bins=30, label='train')
    sns.distplot(test[feature],color="red", kde=True, bins=30, label='test')
    plt.ylabel("");plt.xlabel(feature, fontsize=9);plt.legend()
    i += 1
plt.show()

In [None]:
# the subject is repeated more than once
import random
sample=random.randint(0, train.subject.nunique()-1)
train_mask=train['subject']==sample
train[train_mask].iloc[:,:]

In [None]:
train.subject.hist(bins=300),train.subject.unique().max()

In [None]:
sample=random.randint(0, len(train)-1)
sample1=[train.iloc[sample].subject]
sample2=[train.iloc[sample].sequence]
train_mask=train[train.subject.isin(sample1)&train.sequence.isin(sample2)]
features = train_mask.iloc[:,3:]
i = 1
plt.figure()
fig, ax = plt.subplots(3, 5,figsize=(20, 20))
for feature in features:
    plt.subplot(3,5,i)
    sns.distplot(train_mask[feature],color="blue", kde=True, bins=30, label='train')
    plt.ylabel("");plt.xlabel(feature, fontsize=9);plt.legend()
    i += 1
plt.show()

In [None]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
output_notebook()

sample=random.randint(0, len(train)-1)
sample1=[train.iloc[sample].subject]
sample2=[train.iloc[sample].sequence]
data=train[train.subject.isin(sample1)&train.sequence.isin(sample2)]
features = data.iloc[:,3:]

plot = figure(x_axis_type="datetime", title="Step VS Sensor",sizing_mode="stretch_width")
plot.grid.grid_line_alpha=0.6
plot.xaxis.axis_label = 'step'
plot.yaxis.axis_label = 'sensor'

plot.line(data['step'], data['sensor_00'], color='#A6CEE3', legend_label='sensor_00')
plot.line(data['step'], data['sensor_01'], color='#404387', legend_label='sensor_01')
plot.line(data['step'], data['sensor_02'], color='#410967', legend_label='sensor_02')
plot.line(data['step'], data['sensor_03'], color='#deebf7', legend_label='sensor_03')
plot.line(data['step'], data['sensor_04'], color='#FC2A99', legend_label='sensor_04')
plot.line(data['step'], data['sensor_05'], color='#FDE724', legend_label='sensor_05')
plot.line(data['step'], data['sensor_06'], color='#33A02C', legend_label='sensor_06')
plot.line(data['step'], data['sensor_07'], color='#B2DF8A', legend_label='sensor_07')
plot.line(data['step'], data['sensor_08'], color='#FB2A99', legend_label='sensor_08')
plot.line(data['step'], data['sensor_09'], color='#35B778', legend_label='sensor_09')
plot.line(data['step'], data['sensor_10'], color='#DC5039', legend_label='sensor_10')
plot.line(data['step'], data['sensor_11'], color='#FB9A99', legend_label='sensor_11')
plot.line(data['step'], data['sensor_12'], color='#30678D', legend_label='sensor_12')


plot.legend.location = "center_left"
plot.legend.background_fill_alpha = 0.6

show(plot)

In [None]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
output_notebook()

sample=random.randint(0, len(train)-1)
sample1=[train.iloc[sample].subject]
sample2=[train.iloc[sample].sequence]
data=train[train.subject.isin(sample1)&train.sequence.isin(sample2)]
features = data.iloc[:,3:]

plot = figure(x_axis_type="datetime", title="Step VS Sensor",sizing_mode="stretch_width")
plot.grid.grid_line_alpha=0.6
plot.xaxis.axis_label = 'step'
plot.yaxis.axis_label = 'sensor'

plot.line(data['step'], data['sensor_00'], color='#A6CEE3', legend_label='sensor_00')
plot.line(data['step'], data['sensor_01'], color='#404387', legend_label='sensor_01')
plot.line(data['step'], data['sensor_02'], color='#410967', legend_label='sensor_02')
plot.line(data['step'], data['sensor_03'], color='#deebf7', legend_label='sensor_03')
plot.line(data['step'], data['sensor_04'], color='#FC2A99', legend_label='sensor_04')
plot.line(data['step'], data['sensor_05'], color='#FDE724', legend_label='sensor_05')
plot.line(data['step'], data['sensor_06'], color='#33A02C', legend_label='sensor_06')
plot.line(data['step'], data['sensor_07'], color='#B2DF8A', legend_label='sensor_07')
plot.line(data['step'], data['sensor_08'], color='#FB2A99', legend_label='sensor_08')
plot.line(data['step'], data['sensor_09'], color='#35B778', legend_label='sensor_09')
plot.line(data['step'], data['sensor_10'], color='#DC5039', legend_label='sensor_10')
plot.line(data['step'], data['sensor_11'], color='#FB9A99', legend_label='sensor_11')
plot.line(data['step'], data['sensor_12'], color='#30678D', legend_label='sensor_12')


plot.legend.location = "center_left"
plot.legend.background_fill_alpha = 0.6

show(plot)

In [None]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
output_notebook()

sample=random.randint(0, len(train)-1)
sample1=[train.iloc[sample].subject]
sample2=[train.iloc[sample].sequence]
data=train[train.subject.isin(sample1)&train.sequence.isin(sample2)]
features = data.iloc[:,3:]

plot = figure(x_axis_type="datetime", title="Step VS Sensor",sizing_mode="stretch_width")
plot.grid.grid_line_alpha=0.6
plot.xaxis.axis_label = 'step'
plot.yaxis.axis_label = 'sensor'

plot.line(data['step'], data['sensor_00'], color='#A6CEE3', legend_label='sensor_00')
plot.line(data['step'], data['sensor_01'], color='#404387', legend_label='sensor_01')
plot.line(data['step'], data['sensor_02'], color='#410967', legend_label='sensor_02')
plot.line(data['step'], data['sensor_03'], color='#deebf7', legend_label='sensor_03')
plot.line(data['step'], data['sensor_04'], color='#FC2A99', legend_label='sensor_04')
plot.line(data['step'], data['sensor_05'], color='#FDE724', legend_label='sensor_05')
plot.line(data['step'], data['sensor_06'], color='#33A02C', legend_label='sensor_06')
plot.line(data['step'], data['sensor_07'], color='#B2DF8A', legend_label='sensor_07')
plot.line(data['step'], data['sensor_08'], color='#FB2A99', legend_label='sensor_08')
plot.line(data['step'], data['sensor_09'], color='#35B778', legend_label='sensor_09')
plot.line(data['step'], data['sensor_10'], color='#DC5039', legend_label='sensor_10')
plot.line(data['step'], data['sensor_11'], color='#FB9A99', legend_label='sensor_11')
plot.line(data['step'], data['sensor_12'], color='#30678D', legend_label='sensor_12')


plot.legend.location = "center_left"
plot.legend.background_fill_alpha = 0.6

show(plot)

In [None]:
#Train label data

labels = pd.read_csv('../input/tabular-playground-series-apr-2022/train_labels.csv')
labels

In [None]:
labels.state.hist()

In [None]:
groups = train["sequence"]
train = train.drop(["sequence", "subject", "step"], inplace=False, axis=1).values
test = test.drop(["sequence", "subject", "step"], inplace=False, axis=1).values
labels = labels["state"]
train = train.reshape(int(len(train)/60), 60, 13)
test = test.reshape(int(len(test)/60), 60, 13)

In [None]:
def BuildNN():
    with tpu_strategy.scope():
        model = keras.models.Sequential([
            keras.layers.Input(shape=(60, 13)),
            keras.layers.LSTM(720, return_sequences=True),
            keras.layers.LSTM(512, return_sequences=True),
            keras.layers.LSTM(256, return_sequences=True),
            keras.layers.LSTM(128, return_sequences=True),
            keras.layers.Conv1D(32, 7),
            keras.layers.MaxPooling1D(),
            keras.layers.Conv1D(64, 3),
            keras.layers.MaxPooling1D(),
            keras.layers.Conv1D(128, 3),
            keras.layers.GlobalMaxPooling1D(),
            keras.layers.Dense(150, activation="swish"),
            keras.layers.Dense(50, activation="swish"),
            keras.layers.Dense(1, activation="sigmoid")
        ])

        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=[keras.metrics.AUC()])
    return model

In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
cv_score = 0
test_preds = []
kf = GroupKFold(n_splits=7)
for fold_idx, (train_idx, valid_idx) in enumerate(kf.split(train, labels, groups.unique())):
    
    print("*"*15, f"Fold {fold_idx+1}", "*"*15)
    
    X_train, X_valid = train[train_idx], train[valid_idx]
    y_train, y_valid = labels.iloc[train_idx].values, labels.iloc[valid_idx].values
    
    model = BuildNN()
    model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=100, batch_size=256, 
              callbacks=[keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)])
    
    cv_score += roc_auc_score(y_valid, model.predict(X_valid).squeeze())
    
    test_preds.append(model.predict(test).squeeze())
    
print(cv_score/7)

In [None]:
submission = pd.read_csv("../input/tabular-playground-series-apr-2022/sample_submission.csv")
submission["state"] = sum(test_preds)/7
submission.to_csv("submission.csv", index=False)
submission