In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

## **1️⃣ Load Data**

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train_features = pd.read_csv('/kaggle/input/tabular-playground-series-apr-2022/train.csv')
train_labels = pd.read_csv('/kaggle/input/tabular-playground-series-apr-2022/train_labels.csv')
test_df = pd.read_csv('/kaggle/input/tabular-playground-series-apr-2022/test.csv')

In [None]:
train_features.head()

In [None]:
train_labels.head()

## **2️⃣ EDA**

#### **1. Null Data**

In [None]:
train_features.isnull().sum()

#### **2. Distribution of sensors for sequence 0**

In [None]:
train_features_0 = train_features[train_features['sequence']==0]

for i in ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']:
    plt.plot(train_features_0['step'], train_features_0['sensor_'+i])
plt.show()

In [None]:
for i in ['00', '01', '02', '03']:
    plt.plot(train_features_0['step'], train_features_0['sensor_'+i], label='sensor_'+i)
plt.legend(fontsize=8)
plt.show()

In [None]:
for i in ['04', '05', '06', '07']:
    plt.plot(train_features_0['step'], train_features_0['sensor_'+i], label='sensor_'+i)
plt.legend(fontsize=8)
plt.show()

In [None]:
for i in ['08', '09', '10', '11', '12']:
    plt.plot(train_features_0['step'], train_features_0['sensor_'+i], label='sensor_'+i)
plt.legend(fontsize=8)
plt.show()

#### **3. Distribution of sensors for sequence 1**

In [None]:
train_features_1 = train_features[train_features['sequence']==1]

for i in ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']:
    plt.plot(train_features_1['step'], train_features_1['sensor_'+i])
plt.show()

In [None]:
for i in ['00', '01', '02', '03']:
    plt.plot(train_features_1['step'], train_features_1['sensor_'+i], label='sensor_'+i)
plt.legend(fontsize=8)
plt.show()

In [None]:
for i in ['04', '05', '06', '07']:
    plt.plot(train_features_1['step'], train_features_1['sensor_'+i], label='sensor_'+i)
plt.legend(fontsize=8)
plt.show()

In [None]:
for i in ['08', '09', '10', '11', '12']:
    plt.plot(train_features_1['step'], train_features_1['sensor_'+i], label='sensor_'+i)
plt.legend(fontsize=8)
plt.show()

#### **4. distribution of train labels**

In [None]:
train_labels['state'].value_counts()

## **3️⃣ Data Preprocessing**

In [None]:
train_features.drop(['sequence', 'subject', 'step'], axis=1, inplace=True)
test_df.drop(['sequence', 'subject', 'step'], axis=1, inplace=True)
train_labels.drop('sequence', axis=1, inplace=True)

#### **1. Standard Scaler**

In [None]:
# from sklearn.preprocessing import StandardScaler
# 
# scaler = StandardScaler()
# train_features = scaler.fit_transform(train_features)
# test_df = scaler.transform(test_df)

#### **2. Reshape**

In [None]:
# LSTM input shape : (batch_size, input_length, input_dim)

train_features = train_features.values.reshape(int(len(train_features)/60), 60, 13)
test_df = test_df.values.reshape(int(len(test_df)/60), 60, 13)

In [None]:
train_features.shape

In [None]:
train_labels.shape

## **4️⃣ LSTM**

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(train_features, train_labels)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional, Input
from keras.layers import Dropout,MaxPooling1D, GlobalMaxPooling1D

model = Sequential()
model.add(Input(shape=(60,13)))
model.add(LSTM(128, return_sequences=True))
model.add(MaxPooling1D())
model.add(LSTM(512, return_sequences=True))
model.add(MaxPooling1D())
model.add(LSTM(256, return_sequences=True))
model.add(MaxPooling1D())
model.add(GlobalMaxPooling1D())
model.add(Dropout(0.5))
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.summary()

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics='accuracy')
history = model.fit(X_train, y_train, batch_size=500, epochs=10, validation_data=(X_val, y_val))

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model train vs validation loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

In [None]:
model = Sequential()
model.add(Input(shape=(60,13)))
model.add(LSTM(128, return_sequences=True))
model.add(MaxPooling1D())
model.add(LSTM(512, return_sequences=True))
model.add(MaxPooling1D())
model.add(LSTM(256, return_sequences=True))
model.add(MaxPooling1D())
model.add(LSTM(256, return_sequences=True))
model.add(GlobalMaxPooling1D())
model.add(Dropout(0.5))
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.summary()

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics='accuracy')
history = model.fit(X_train, y_train, batch_size=500, epochs=15, validation_data=(X_val, y_val))

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model train vs validation loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

## **5️⃣ Submission**

In [None]:
submission = pd.read_csv('/kaggle/input/tabular-playground-series-apr-2022/sample_submission.csv', index_col=0)
submission['state'] = model.predict(test_df)
submission

In [None]:
submission.to_csv('submission.csv', index=True)