# TPS-22-03 with TabTransformer

In [None]:
import numpy as np 
import tensorflow as tf
import pandas as pd
import math
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import tensorflow_addons as tfa

## Common Parameters

In [None]:
batch_size = 1024
embedding_dims = 16
roadways = ['0_0_EB', '0_0_NB', '0_0_SB', '0_1_EB', '0_1_NB', '0_1_SB', '0_1_WB', '0_2_EB', '0_2_NB', '0_2_SB', '0_2_WB', '0_3_EB', '0_3_NB', '0_3_NE', '0_3_SB', '0_3_SW', '0_3_WB', '1_0_EB', '1_0_NB', '1_0_NE', '1_0_SB', '1_0_SW', '1_0_WB', '1_1_EB', '1_1_NB', '1_1_SB', '1_1_WB', '1_2_EB', '1_2_NB', '1_2_NE', '1_2_SB', '1_2_SW', '1_2_WB', '1_3_EB', '1_3_NB', '1_3_NE', '1_3_SB', '1_3_SW', '1_3_WB', '2_0_EB', '2_0_NB', '2_0_SB', '2_0_WB', '2_1_EB', '2_1_NB', '2_1_NE', '2_1_NW', '2_1_SB', '2_1_SE', '2_1_SW', '2_1_WB', '2_2_EB', '2_2_NB', '2_2_NE', '2_2_NW', '2_2_SB', '2_2_SE', '2_2_SW', '2_2_WB', '2_3_EB', '2_3_NB', '2_3_NE', '2_3_SB', '2_3_SW', '2_3_WB']
roadways_map = dict()
for i, roadway in enumerate(roadways):
    roadways_map[roadway] = i
categorical_columns = ["x", "y", "direction", "roadway_index", "hour", "month", "dayofweek"]
is_training = True
mlp_hidden_units_factors = [2, 1]

## Import dataset

In [None]:
train = pd.read_csv("../input/tabular-playground-series-mar-2022/train.csv")
direction_map = dict()
for i, direction in enumerate(train.direction.unique()):
    direction_map[direction] = i

## Feature Engineering

In [None]:
 def feature_engineering(data):
    data["roadway"] = data["x"].map(lambda item: str(item)) + "_" + data["y"].map(lambda item: str(item)) + "_" + data["direction"]
    data["roadway_index"] = data["roadway"].map(lambda item: roadways_map[item])
    data["direction"] = data["direction"].map(lambda item: direction_map[item])
    data['time'] = pd.to_datetime(data['time'])
    data['month'] = data['time'].dt.month
    data['dayofweek'] = data['time'].dt.dayofweek
    data['hour'] = data['time'].dt.hour
    data = data.drop(['time'], axis=1)
    return data

In [None]:
train = feature_engineering(train)
train.head()

## Make Tensorflow Dataset

In [None]:
def preprocess(x, y):
    return (x[0], x[1], x[2], x[3], x[4], x[5], x[6]), y
def make_dataset(df, sequence_length=32, mode="train"):
    dataset = tf.data.Dataset.from_tensor_slices((df[categorical_columns], df["congestion"]))
    dataset = dataset.map(preprocess)
    if mode == "train":
        dataset = dataset.shuffle(buffer_size=batch_size)
    dataset = dataset.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)
    return dataset

In [None]:
split_fraction = 0.9
split_index = int(len(train) * split_fraction)
train_data = train[0:split_index]
val_data = train[split_index:]
train_data.shape, val_data.shape
train_ds = make_dataset(train_data)
valid_ds = make_dataset(val_data, mode="valid")

## Model Development

### Create Lookup layers

In [None]:
%%time
lookupLayersMap = dict()
for column in categorical_columns:
    unique_values = list(train[column].unique())
    lookupLayersMap[column] = tf.keras.layers.IntegerLookup(vocabulary=unique_values)

In [None]:
def create_mlp(hidden_units, dropout_rate, activation, normalization_layer, name=None):
    mlp_layers = []
    for units in hidden_units:
        mlp_layers.append(normalization_layer),
        mlp_layers.append(layers.Dense(units, activation=activation))
        mlp_layers.append(layers.Dropout(dropout_rate))

    return keras.Sequential(mlp_layers, name=name)
def get_model():
    categorical_inputs = []
    categorical_vectors = []
    for column in categorical_columns:
        categorical_input = keras.Input(shape=(1, ), name=f"{column}_dense_input")
        lookup = lookupLayersMap[column]
        categorical_vector = lookup(categorical_input)
        categorical_vector = keras.layers.Embedding(len(lookup.get_vocabulary()), embedding_dims, input_length=1)(categorical_vector)
        categorical_vector = keras.layers.Reshape((-1, ))(categorical_vector)
        categorical_vectors.append(categorical_vector)
        categorical_inputs.append(categorical_input)
    encoded_categorical_features = tf.stack(categorical_vectors, axis=1)
    
    num_columns = encoded_categorical_features.shape[1]
    column_embedding = layers.Embedding(
        input_dim=num_columns, output_dim=embedding_dims
    )
    column_indices = tf.range(start=0, limit=num_columns, delta=1)

    encoded_categorical_features = encoded_categorical_features + column_embedding(
        column_indices
    )
    for block_idx in range(3):
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=8,
            key_dim=embedding_dims,
            dropout=0.2,
            name=f"multihead_attention_{block_idx}",
        )(encoded_categorical_features, encoded_categorical_features)
        # Skip connection 1.
        x = layers.Add(name=f"skip_connection1_{block_idx}")(
            [attention_output, encoded_categorical_features]
        )
        # Layer normalization 1.
        x = layers.LayerNormalization(name=f"layer_norm1_{block_idx}", epsilon=1e-6)(x)
        # Feedforward.
        feedforward_output = create_mlp(
            hidden_units=[embedding_dims],
            dropout_rate=0.2,
            activation=keras.activations.gelu,
            normalization_layer=layers.LayerNormalization(epsilon=1e-6),
            name=f"feedforward_{block_idx}",
        )(x)
        # Skip connection 2.
        x = layers.Add(name=f"skip_connection2_{block_idx}")([feedforward_output, x])
        # Layer normalization 2.
        encoded_categorical_features = layers.LayerNormalization(
            name=f"layer_norm2_{block_idx}", epsilon=1e-6
        )(x)
        
    # Flatten the "contextualized" embeddings of the categorical features.
    features = layers.Flatten()(encoded_categorical_features)
    # Compute MLP hidden_units.
    mlp_hidden_units = [
        factor * features.shape[-1] for factor in mlp_hidden_units_factors
    ]
    
    # Create final MLP.
    features = create_mlp(
        hidden_units=mlp_hidden_units,
        dropout_rate=0.2,
        activation=keras.activations.selu,
        normalization_layer=layers.BatchNormalization(),
        name="MLP",
    )(features)

    output = keras.layers.Dense(1)(features)
    model = keras.Model(inputs=categorical_inputs, outputs=output)
    adam = tfa.optimizers.AdamW(
        learning_rate=1e-3, weight_decay=0.0001
    )
    model.compile(loss="mse", optimizer=adam, metrics=["mae"])
    return model

## Create Model

In [None]:
model = get_model()
model.summary()
keras.utils.plot_model(model, show_shapes=True)

## Model Training

In [None]:
cp = keras.callbacks.ModelCheckpoint("model.tf", monitor="val_mae", save_best_only=True, save_weights_only=True)
es = keras.callbacks.EarlyStopping(patience=10)
if is_training:
    history = model.fit(train_ds, epochs=50, validation_data=valid_ds, callbacks=[es, cp])
    model.load_weights("model.tf")
    pd.DataFrame(history.history).plot()
    plt.show()
else:
    model.load_weights(f"../input/tps2203-dnn-output/model.tf")

## Submission

In [None]:
def preprocess_test(x):
    return (x[0], x[1], x[2], x[3], x[4], x[5], x[6]), 0
def make_test_dataset(data):
    dataset = tf.data.Dataset.from_tensor_slices((data))
    dataset = dataset.map(preprocess_test)
    dataset = dataset.batch(batch_size).cache().prefetch(1)
    return dataset

In [None]:
%%time
import time
begin = time.time()
test = pd.read_csv("../input/tabular-playground-series-mar-2022/test.csv")
test = feature_engineering(test)
test_ds = make_test_dataset(test[categorical_columns])
y_pred = model.predict(test_ds)
submission = pd.read_csv("../input/tabular-playground-series-mar-2022/sample_submission.csv")
submission["congestion"] = np.round(y_pred)
submission.to_csv("submission.csv", index=False)
submission.head()