In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, log_loss
from sklearn.preprocessing import MinMaxScaler

In [None]:
SEED = 2021


In [None]:
train = pd.read_csv('../input/tabular-playground-series-may-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-may-2021/test.csv')
sample_submission = pd.read_csv('../input/tabular-playground-series-may-2021/sample_submission.csv')

In [None]:
train.drop('id', axis=1, inplace=True)
test.drop('id', axis=1, inplace=True)

In [None]:
XT = train.iloc[:, :-1]
YT = train.iloc[:, -1]
XV  = test

In [None]:
scaler = MinMaxScaler().fit(XT)
Xt = scaler.transform(XT)
Z = scaler.transform(XV)

In [None]:
y = YT.map({val:idx for idx, val in enumerate(sorted(YT.unique()))})
y = to_categorical(y.values)

In [None]:
Xt, Xv, yt, yv = train_test_split(Xt, y, test_size=.1, random_state=SEED, stratify=y)

In [None]:
stop = EarlyStopping(monitor='val_loss', patience=2, mode='min')


In [None]:
def mlp_model(lr):
    model = Sequential([
        Dense(128, input_dim=50, activation='relu', name='dense1'),
        BatchNormalization(),
        Dropout(0.1),

        Dense(64, activation='relu', name='dense2'),
        BatchNormalization(),
        Dropout(0.2),

        Dense(64, activation='relu', name='dense3'),
        BatchNormalization(),
        Dropout(0.4),

        Dense(4, activation='softmax', name='fully')
    ])

    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=Adam(lr=lr))
    return model


In [None]:
model = mlp_model(0.00078)
model.summary()

In [None]:
history = model.fit(Xt, yt, validation_split=0.21, batch_size=545, epochs=175, callbacks=[stop])

In [None]:
model.evaluate(Xv, yv)


In [None]:
fig, ax = plt.subplots(figsize=(20,8))
plt.plot(history.epoch, history.history['accuracy'])
plt.plot(history.epoch, history.history['val_accuracy'])
ax.set_title('Learning Curve (Accuracy)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.legend(['train', 'val'], loc='best')
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(20,8))
plt.plot(history.epoch, history.history['loss'])
plt.plot(history.epoch, history.history['val_loss'])
ax.set_title('Learning Curve (Loss)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.legend(['train', 'val'], loc='best')
plt.show()

In [None]:
def cv(fold, lr, EPOCHS, BATCH_SIZE):
    skf = StratifiedKFold(n_splits=fold, shuffle=True)
    model = mlp_model(lr)
    losses = []
    acc = []
    stop = EarlyStopping(monitor='val_loss', patience=2, mode='min')
    for i, (train_idx, val_idx) in enumerate(skf.split(X, y.argmax(1))):
        print(f'Fold {i+1}')
        Xt, Xv, yt, yv = X.iloc[train_idx], X.iloc[val_idx], y[train_idx], y[val_idx]
        model.fit(Xt, yt, validation_split=0.1, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0, callbacks=[stop])
        score = model.evaluate(Xv, yv, verbose=0)
        losses.append(score[0])
        acc.append(score[1])
        print(f'Loss ={score[0]} Accuracy = {score[1]}')
    
    print('CV Mean')
    print(f'Loss ={np.mean(losses)} Accuracy ={np.mean(acc)}')

In [None]:
sample_submission[['Class_1', 'Class_2', 'Class_3', 'Class_4']] = model.predict(Z)

In [None]:
sample_submission.to_csv("./submission_1508.csv", index=False)

