# Tabular Playground Series Prediction

## Import Packages

In [None]:
import numpy as np 
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

## Load datasets

In [None]:
train = pd.read_csv("/kaggle/input/tabular-playground-series-aug-2021/train.csv")
test = pd.read_csv("/kaggle/input/tabular-playground-series-aug-2021/test.csv")

## EDA

In [None]:
train.head()

In [None]:
train.shape

In [None]:
train.describe().transpose()

There isn't an obvious correlation between features and target values.

In [None]:
corr_score = train.corr()

In [None]:
corr_score["loss"].sort_values(ascending=False)

## Data Preprocessing

## Drop id column

In [None]:
train.pop("id")
test_ids = test.pop("id")

In [None]:
train_mean = train.mean()
train_std = train.std()

In [None]:
train_targets_mean = train_mean.pop("loss")
train_targets_std = train_std.pop("loss")

### Train Validation Split

In [None]:
validation_split = 0.2

In [None]:
train_features, validation_features = train_test_split(train, test_size=validation_split)

In [None]:
train_targets, validation_targets = train_features.pop("loss"),  validation_features.pop("loss")

### Data Scaling

In [None]:
should_scale = True
if should_scale == True:
    train_features = (train_features - train_mean) / train_std
    validation_features = (validation_features - train_mean) / train_std
    test_features = (test - train_mean) / train_std
    print(test_features.head())
    print(train_features.head())
    print(validation_features.head())

## Model Development
### Using DNN

In [None]:
early_stop = tf.keras.callbacks.EarlyStopping(patience=5)
checkpoint_path = "model.h5"
checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, monitor="val_rmse", mode="min", save_best_only=True)
rmse = tf.keras.metrics.RootMeanSquaredError(name="rmse")
model = tf.keras.Sequential([
    tf.keras.layers.Input((train_features.shape[-1])),
    tf.keras.layers.Dense(32, activation="elu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation="elu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation="elu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation="elu")
])
model.compile(loss="mse", optimizer="adam", metrics=["mae", rmse])
history = model.fit(train_features, train_targets, epochs=100, batch_size=1000, validation_data=(validation_features, validation_targets), callbacks=[early_stop, checkpoint])

### Model Evaluation

#### Loss and Accuracy over time

In [None]:
for key in ["loss", "rmse", "mae"]:
    pd.DataFrame(history.history, columns=[key, "val_" + key]).plot()
    plt.show()

### Submission

In [None]:
model.load_weights(checkpoint_path)
loss_pred = model.predict(test_features)
print(loss_pred[:10])

In [None]:
submission = pd.DataFrame({"id": test_ids, "loss": loss_pred.reshape(-1)})
submission.to_csv("submission.csv", index=False)