# Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, log_loss
from sklearn.preprocessing import MinMaxScaler

In [None]:
SEED = 2021

# Import Data

In [None]:
train = pd.read_csv('../input/tabular-playground-series-may-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-may-2021/test.csv')
sample_submission = pd.read_csv('../input/tabular-playground-series-may-2021/sample_submission.csv')

# Preprocessing Data

In [None]:
train.drop('id', axis=1, inplace=True)
test.drop('id', axis=1, inplace=True)

In [None]:
XT = train.iloc[:, :-1]
YT = train.iloc[:, -1]
XV  = test

In [None]:
scaler = MinMaxScaler().fit(XT)
X = scaler.transform(XT)
Z = scaler.transform(XV)

In [None]:
y = YT.map({val:idx for idx, val in enumerate(sorted(YT.unique()))})
y = to_categorical(y.values)

In [None]:
Xt, Xv, yt, yv = train_test_split(X, y, test_size=.1, random_state=SEED, stratify=y)

# Multi-Layer Perceptron

## Define Stopping Criteria

In [None]:
stop = EarlyStopping(monitor='val_loss', patience=2, mode='min')

## Define MLP Model

In [None]:
def mlp_model(lr):
    model = Sequential([
        Dense(256, input_dim=50, activation='relu', name='dense1'),
        BatchNormalization(),
        Dropout(0.1),

        Dense(192, activation='relu', name='dense2'),
        BatchNormalization(),
        Dropout(0.2),

        Dense(128, activation='relu', name='dense3'),
        BatchNormalization(),
        Dropout(0.4),

        Dense(4, activation='softmax', name='fully')
    ])

    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=Adam(lr=lr))
    return model

In [None]:
model = mlp_model(25e-5)
model.summary()

## Model Train

In [None]:
history = model.fit(Xt, yt, validation_split=0.1, batch_size=512, epochs=128, callbacks=[stop])

## Model Validation

In [None]:
model.evaluate(Xv, yv)

## Plotting Learning Curve

In [None]:
fig, ax = plt.subplots(figsize=(20,8))
plt.plot(history.epoch, history.history['accuracy'])
plt.plot(history.epoch, history.history['val_accuracy'])
ax.set_title('Learning Curve (Accuracy)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.legend(['train', 'val'], loc='best')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(20,8))
plt.plot(history.epoch, history.history['loss'])
plt.plot(history.epoch, history.history['val_loss'])
ax.set_title('Learning Curve (Loss)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.legend(['train', 'val'], loc='best')
plt.show()

# Cross Validation

In [None]:
def cv(fold, lr, EPOCHS, BATCH_SIZE):
    skf = StratifiedKFold(n_splits=fold, shuffle=True)
    model = mlp_model(lr)
    losses = []
    acc = []
    stop = EarlyStopping(monitor='val_loss', patience=2, mode='min')
    for i, (train_idx, val_idx) in enumerate(skf.split(X, y.argmax(1))):
        print(f'Fold {i+1}')
        Xt, Xv, yt, yv = X[train_idx], X[val_idx], y[train_idx], y[val_idx]
        model.fit(Xt, yt, validation_split=0.1, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0, callbacks=[stop])
        score = model.evaluate(Xv, yv, verbose=0)
        losses.append(score[0])
        acc.append(score[1])
        print(f'Loss ={score[0]} Accuracy = {score[1]}')
    
    print('CV Mean')
    print(f'Loss ={np.mean(losses)} Accuracy ={np.mean(acc)}')

In [None]:
cv(10, 25e-5, 128, 512)

# Submission File

In [None]:
sample_submission[['Class_1', 'Class_2', 'Class_3', 'Class_4']] = model.predict(Z)

In [None]:
sample_submission.to_csv('submission_.csv', index=False)