# Tabular Playground June 2021 Submission
We'll start with the basic imports. For the submission, I plan on using a deep learning model with TensorFlow.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Setting up the Data
Now, we'll load and take a look at the data.

In [None]:
from sklearn.preprocessing import OneHotEncoder
OH_encoder = OneHotEncoder(handle_unknown='ignore',sparse=False)

train_data = pd.read_csv('../input/tabular-playground-series-jun-2021/train.csv', index_col='id')
X = train_data.iloc[:,:75]
# One-Hot encode the labels
y = pd.DataFrame(OH_encoder.fit_transform(pd.DataFrame(train_data.target)))
y = y.rename(columns={i:f'Class_{i+1}' for i in range(9)})
train_data.head()

In [None]:
test_data = pd.read_csv('../input/tabular-playground-series-jun-2021/test.csv', index_col='id')
X_test = test_data.iloc[:,:]
test_data.head()

## The Model
Here, we'll use a keras deep learning model with the following architecture:
1. InputLayer
2. BatchNormalization
3. Dense(16, activation='relu')
4. BatchNormalization
5. Dense(32, activation='relu')
6. BatchNormalization
7. Dense(64, activation='relu')
8. BatchNormalization
9. Dense(128, activation='relu')
10. BatchNormalization
11. Dense(9, activation='softmax')

We'll then compile the model with the Adam optimizer, the categorical_crossentropy loss, and the accuracy metric.

In [None]:
def my_model():
    model = keras.Sequential([
        layers.InputLayer([75]),
        layers.BatchNormalization(),
        layers.Dense(16, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(32, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(64, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(9, activation='softmax')
    ])
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

## Training and Validation
Now, we'll set up the training and validation data.

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.7, test_size=0.3, random_state=0)

Then, we'll train the model on the training data and check with validations. We'll use an early stopping metric as well, training on many epochs.

In [None]:
early_stopping = keras.callbacks.EarlyStopping(
    patience=8,
    min_delta=0.001,
    restore_best_weights=True,
    verbose=1
)

model_val = my_model()
history = model_val.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=100,
    epochs=50,
    callbacks=[early_stopping]
)

## Final Training and Submission

In [None]:
early_stopping_final = keras.callbacks.EarlyStopping(
    patience=8,
    min_delta=0.001,
    restore_best_weights=True,
    verbose=1
)

model = my_model()
history = model.fit(
    X, y,
    batch_size=100,
    epochs=50,
    callbacks=[early_stopping_final]
)

In [None]:
predictions = model.predict(X_test)
output = pd.DataFrame(predictions)
output = output.rename(columns={i:f'Class_{i+1}' for i in range(9)})
output = output.rename_axis("id", axis='rows')
idcol = pd.read_csv('../input/tabular-playground-series-jun-2021/test.csv')
idcol = idcol.iloc[:,0]
output = pd.concat([idcol, output], axis=1)
output.to_csv('submission.csv', index=False)
print("Your submission was successfully saved!")