Inspirated by: https://www.kaggle.com/subinium/tps-may-deeplearning-pipeline-for-beginner

In [None]:
# Library
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# CSV
train = pd.read_csv('../input/tabular-playground-series-jun-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-jun-2021/test.csv')
sample_submission = pd.read_csv('../input/tabular-playground-series-jun-2021/sample_submission.csv')

In [None]:
train

In [None]:
train = train.drop('id', axis=1)
test = test.drop('id', axis=1)

### Normalization

In [None]:
for i in range(50):
    mean, std = train[f'feature_{i}'].mean(), train[f'feature_{i}'].std()
    train[f'feature_{i}'] = train[f'feature_{i}'].apply(lambda x : (x-mean)/std)
    test[f'feature_{i}'] = test[f'feature_{i}'].apply(lambda x : (x-mean)/std)

In [None]:
label_dict = {val:idx for idx, val in enumerate(sorted(train['target'].unique()))}
train['target'] = train['target'].map(label_dict)

target = train['target']
train.drop(['target'], inplace=True, axis=1)

In [None]:
train = train.values
target = target.values
target =  to_categorical(target)

In [None]:
train

### Split

In [None]:
X_train, X_val, y_train, y_val = train_test_split(train, target, test_size = 0.2, random_state = 2, stratify = target)

In [None]:
num_features = 75
num_classes = 9

### Model

In [None]:
model = Sequential([
    Dense(512, input_dim = num_features, activation = 'relu'),
    BatchNormalization(),
    Dropout(0.7),
    Dense(512, activation = 'relu'),
    BatchNormalization(),
    Dropout(0.7),
    Dense(256, activation = 'relu'),
    BatchNormalization(),
    Dropout(0.7),
    Dense(256, activation = 'relu'),
    BatchNormalization(),
    Dropout(0.7),
    Dense(128, activation = 'relu'),
    BatchNormalization(),
    Dropout(0.7),
    Dense(64, activation = 'relu'),
    BatchNormalization(),
    Dropout(0.7),
    Dense(num_classes, activation = 'softmax')
    ])

model.summary()

### Compile

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')

### Train

In [None]:
history = model.fit(X_train, y_train,
          batch_size = 50, epochs = 20,
          validation_data=(X_val, y_val));

In [None]:
score = model.evaluate(X_val, y_val, verbose = 0)
print('Test loss: {}%'.format(score[0]))
print('Test score: {}%'.format(score[1] * 100))
print("MLP Error: %.2f%%" % (100 - score[1] * 100))

In [None]:
fig, ax = plt.subplots(figsize = (20, 8))
sns.lineplot(x = history.epoch, y = history.history['loss'])
sns.lineplot(x = history.epoch, y = history.history['val_loss'])
ax.set_title('Learning Curve (Loss)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.legend(['train', 'test'], loc = 'best')
plt.show()

### Pretiction

In [None]:
sample_submission[['Class_1','Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9']] = model.predict(test)
sample_submission.to_csv('./prediction1.csv', index = False)