---
# [Tabular Playground Series - Mar 2022][1]
---
**Comments**: Thanks to previous great Notebooks.

1. [[TPS JAN 22] Base XGB & LGB][2]
2. [TabNet in Tensorflow 2.0][3]

---
[1]: https://www.kaggle.com/c/tabular-playground-series-mar-2022
[2]: https://www.kaggle.com/ranjeetshrivastav/tps-jan-22-base-xgb-lgb
[3]: https://www.kaggle.com/marcusgawronsky/tabnet-in-tensorflow-2-0

# 0. Settings

In [None]:
# Import dependencies 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
%matplotlib inline

import os
import pathlib
import gc
import sys
import re
import math 
import random
import time 
from tqdm import tqdm 
from pprint import pprint
from typing import Optional, Union, Tuple

import warnings
warnings.filterwarnings('ignore')

import sklearn
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_hub as hub
from tensorflow.keras.layers.experimental import preprocessing
AUTOTUNE = tf.data.experimental.AUTOTUNE

import tensorflow_addons as tfa

print('import done!')

In [None]:
# global config
config = {}

AUTOTUNE = tf.data.experimental.AUTOTUNE

# For reproducible results    
def seed_all(s):
    random.seed(s)
    np.random.seed(s)
    tf.random.set_seed(s)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ['PYTHONHASHSEED'] = str(s) 
    print('Seeds setted!')
    
global_seed = 42
seed_all(global_seed)

# 1. Data Preprocessing

## 1.1 Data Check

In [None]:
data_config = {'train_csv_path': '../input/tabular-playground-series-mar-2022/train.csv',
              'test_csv_path': '../input/tabular-playground-series-mar-2022/test.csv',
              'sample_submission_path': '../input/tabular-playground-series-mar-2022/sample_submission.csv',
              }

train_df = pd.read_csv(data_config['train_csv_path'])
test_df = pd.read_csv(data_config['test_csv_path'])
submission_df = pd.read_csv(data_config['sample_submission_path'])

print(train_df.shape, test_df.shape, submission_df.shape)
train_df.head()

In [None]:
def print_unique_category(df, column):
    print(f'unique_category_number: {df[column].nunique()}')
    print(f'cagetories: {df[column].unique()}')
    print()

print_unique_category(train_df, 'direction')
print_unique_category(train_df, 'x')
print_unique_category(train_df, 'y')

In [None]:
# Null Value Check
def null_val_check(df):
    null_check_df = df.isnull().sum()
    for key in null_check_df.keys():
        assert null_check_df[key] == 0, f'{key} has {null_check_df[key]} null values.'
    print('No Null values.')
    
null_val_check(train_df)
null_val_check(test_df)

In [None]:
train_df.dtypes

In [None]:
print_unique_category(test_df, 'direction')
print_unique_category(test_df, 'x')
print_unique_category(test_df, 'y')
test_df.head()

In [None]:
submission_df.head()

## 1.2 Normalization

In [None]:
congestion = train_df['congestion']
congestion.describe()

In [None]:
# Standardization
#congestion_mean = congestion.mean()
#congestion_std = congestion.std()
#train_df['congestion'] = (congestion - congestion_mean) / congestion_std 

# min-max scaling
c_min = congestion.min()
c_max = congestion.max()
train_df['congestion'] = (congestion - c_min) / (c_max - c_min)

train_df['congestion'].describe()

## 1.3 Feature Engineering

In [None]:
train_df = train_df.drop(['row_id'], axis=1)
test_df = test_df.drop(['row_id'], axis=1)

train_df.head()

In [None]:
def make_date_features(df, column='time', drop_column=True):
    df['datetime'] = pd.to_datetime(df[column])
    df['year'] = df['datetime'].dt.year
    df['month'] = df['datetime'].dt.month
    df['day'] = df['datetime'].dt.day
    df['dayofweek'] = df['datetime'].dt.dayofweek
    df['hour'] = df['datetime'].dt.hour
    df['minute'] = df['datetime'].dt.minute
    df = df.drop(['datetime'], axis=1)
    if drop_column:
        df = df.drop([column], axis=1)
    return df 

train_df = make_date_features(train_df)
train_df = train_df.drop(['year'], axis=1) # Because the 'year' column only contains '1991'.

test_df = make_date_features(test_df)
test_df = test_df.drop(['year'], axis=1)

train_df.head()

In [None]:
all_df = train_df.copy()
valid_df = train_df.query('month == 9').reset_index(drop=True)
train_df = train_df.query('month != 9').reset_index(drop=True)

print(f'all_df length: {len(all_df)}')
print(f'train_df length: {len(train_df)}')
print(f'valid_df length: {len(valid_df)}')
valid_df.head()

In [None]:
y_all = all_df['congestion']
X_all = all_df.drop(['congestion'], axis=1)

y_train = train_df['congestion'] 
X_train = train_df.drop(['congestion'], axis=1)

y_valid = valid_df['congestion'] 
X_valid = valid_df.drop(['congestion'], axis=1)

print(X_all.shape, y_all.shape)
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)
print(test_df.shape)

In [None]:
categorical_features = ['direction', 'dayofweek']
ct = ColumnTransformer([('one_hot', OneHotEncoder(), categorical_features)], remainder="passthrough")
ct.fit(X_train)

encoded_X_train = ct.transform(X_train)
print(encoded_X_train.shape)

feature_columns = ct.transformers_[0][1].get_feature_names(categorical_features)
print(feature_columns)

columns = list(X_train.columns)
for feature in categorical_features:
    columns.remove(feature)
columns = list(feature_columns) + columns

encoded_X_train_df = pd.DataFrame(encoded_X_train, columns=columns)
encoded_X_train_df.head()

In [None]:
encoded_X_all = ct.transform(X_all)
encoded_X_all_df = pd.DataFrame(encoded_X_all, columns=columns)

encoded_X_valid = ct.transform(X_valid)
encoded_X_valid_df = pd.DataFrame(encoded_X_valid, columns=columns)

encoded_X_test = ct.transform(test_df)
encoded_X_test_df = pd.DataFrame(encoded_X_test, columns=columns)

print(encoded_X_all_df.shape, encoded_X_valid_df.shape, encoded_X_test_df.shape)

## 1.3 Datasets

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((encoded_X_train, y_train))
valid_ds = tf.data.Dataset.from_tensor_slices((encoded_X_valid, y_valid))
print(len(train_ds), len(valid_ds))

config = {'lr': 1e-3,
          'epochs': 1,
          'batch_size': 256,
          'virtual_batch_size': 128,
          }

train_ds = train_ds.batch(config['batch_size'], drop_remainder=True)
valid_ds = valid_ds.batch(config['batch_size'], drop_remainder=True)

# 2. Model Training

## 2.1 TabNet

![tabnet](https://github.com/titu1994/tf-TabNet/raw/master/images/TabNet.png?raw=true)

If you would like to know more about TabNet, please check out the [previous notebook][1].

[1]: https://www.kaggle.com/marcusgawronsky/tabnet-in-tensorflow-2-0

In [None]:
class GLUBlock(tf.keras.layers.Layer):
    def __init__(self,
                 units: Optional[int]=None,
                 virtual_batch_size: Optional[int]=128,
                 momentum: Optional[float]=0.02):
        
        super().__init__()
        self.units = units 
        self.virtual_batch_size = virtual_batch_size 
        self.momentum = momentum 

    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]

        self.fc_output = tf.keras.layers.Dense(self.units, use_bias=False)
        self.bn_output = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size,
                                                            momentum=self.momentum)
        self.fc_gate = tf.keras.layers.Dense(self.units, use_bias=False)
        self.bn_gate = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size,
                                                          momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None):
        output = self.bn_output(self.fc_output(inputs), training=training)
        gate = self.bn_gate(self.fc_gate(inputs), training=training)

        return output * tf.keras.activations.sigmoid(gate)

In [None]:
class FeatureTransformerBlock(tf.keras.layers.Layer):
    def __init__(self,
                 units: Optional[int]=None,
                 virtual_batch_size: Optional[int]=128,
                 momentum: Optional[float]=0.02,
                 skip=False):
        
        super().__init__()
        self.units = units 
        self.virtual_batch_size = virtual_batch_size 
        self.momentum = momentum 
        self.skip = skip 

    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]

        self.initial = GLUBlock(units=self.units,
                                virtual_batch_size=self.virtual_batch_size,
                                momentum=self.momentum)
        self.residual = GLUBlock(units=self.units,
                                 virtual_batch_size=self.virtual_batch_size,
                                 momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None):
        initial = self.initial(inputs, training=training)

        if self.skip == True:
            initial += inputs 

        residual = self.residual(initial, training=training)

        return (initial + residual) * np.sqrt(0.5)

In [None]:
class AttentiveTransformer(tf.keras.layers.Layer):
    def __init__(self,
                 units: Optional[int]=None,
                 virtual_batch_size: Optional[int]=128,
                 momentum: Optional[float]=0.02):
        
        super().__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size 
        self.momentum = momentum 

    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]

        self.fc = tf.keras.layers.Dense(self.units, use_bias=False)
        self.bn = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size,
                                                     momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray],
             priors: Optional[Union[tf.Tensor, np.ndarray]]=None,
             training: Optional[bool]=None) -> tf.Tensor:
        feature = self.bn(self.fc(inputs), training=training)
        if priors is None:
            output = feature 
        else:
            output = feature * priors 

        return tfa.activations.sparsemax(output)

In [None]:
class TabNetStep(tf.keras.layers.Layer):
    def __init__(self, 
                 units: Optional[int]=None,
                 virtual_batch_size: Optional[int]=128,
                 momentum: Optional[float]=0.02):
        super().__init__()
        self.units = units 
        self.virtual_batch_size = virtual_batch_size 
        self.momentum = momentum 

    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]

        self.unique = FeatureTransformerBlock(units=self.units,
                                              virtual_batch_size=self.virtual_batch_size,
                                              momentum=self.momentum,
                                              skip=True)
        self.attention = AttentiveTransformer(units=input_shape[-1],
                                              virtual_batch_size=self.virtual_batch_size,
                                              momentum=self.momentum)
        
    def call(self, inputs, shared, priors, training=None) -> Tuple[tf.Tensor]:
        split = self.unique(shared, training=training)
        keys = self.attention(split, priors, training=training)
        masked = keys * inputs 

        return split, masked, keys 

In [None]:
class TabNetEncoder(tf.keras.layers.Layer):
    def __init__(self,
                 units: int=1,
                 n_steps: int=3,
                 n_features: int=8,
                 outputs: int=1,
                 gamma: float=1.3,
                 epsilon: float=1e-8,
                 sparsity: float=1e-5,
                 virtual_batch_size: Optional[int]=128,
                 momentum: Optional[float]=0.02):
        
        super().__init__()
        self.units = units 
        self.n_steps = n_steps 
        self.n_features = n_features 
        self.virtual_batch_size = virtual_batch_size 
        self.gamma = gamma 
        self.epsilon = epsilon 
        self.momentum = momentum 
        self.sparsity = sparsity 

    def build(self, input_shape: tf.TensorShape):
        self.bn = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size,
                                                     momentum=self.momentum)
        self.shared_block = FeatureTransformerBlock(units=self.n_features,
                                                     virtual_batch_size=self.virtual_batch_size,
                                                     momentum=self.momentum)
        self.initial_step = TabNetStep(units=self.n_features,
                                       virtual_batch_size=self.virtual_batch_size,
                                       momentum=self.momentum)
        self.steps = [TabNetStep(units=self.n_features,
                                 virtual_batch_size=self.virtual_batch_size,
                                 momentum=self.momentum) for _ in range(self.n_steps)]
        self.final = tf.keras.layers.Dense(units=self.units, use_bias=False)

    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None) -> Tuple[tf.Tensor]:
        entropy_loss = 0. 
        encoded = 0. 
        output = 0. 
        importance = 0. 
        prior = tf.reduce_mean(tf.ones_like(X), axis=0)
        
        B = prior * self.bn(X, training=training)
        shared = self.shared_block(B, training=training)
        _, masked, keys = self.initial_step(B, shared, prior, training=training)

        for step in self.steps:
            entropy_loss += tf.reduce_mean(tf.reduce_sum(-keys * tf.math.log(keys + self.epsilon), axis=-1)) / tf.cast(self.n_steps, tf.float32)
            prior *= (self.gamma - tf.reduce_mean(keys, axis=0))
            importance += keys 

            shared = self.shared_block(masked, training=training)
            split, masked, keys = step(B, shared, prior, training=training)
            features = tf.keras.activations.relu(split)

            output += features 
            encoded += split 

        self.add_loss(self.sparsity * entropy_loss)
        prediction = self.final(output)
        return prediction, encoded, importance 

## 2.2 Supervised Learning

In [None]:
#@tf.function
#def identity(x):
#    return x

class TabNetRegressor(tf.keras.Model):
    def __init__(self,
                 outputs: int=1,
                 n_steps: int=3,
                 n_features: int=3,
                 gamma: float=1.3,
                 epsilon: float=1e-8,
                 sparsity: float=1e-5,
                 #feature_column: Optional[tf.keras.layers.DenseFeatures]=None,
                 pretrained_encoder: Optional[tf.keras.layers.Layer]=None,
                 virtual_batch_size: Optional[int]=128,
                 momentum: Optional[float]=0.02):
        
        super().__init__()
        self.outputs = outputs 
        self.n_steps = n_steps 
        self.n_features = n_features 
        #self.feature_column = feature_column 
        self.pretrained_encoder = pretrained_encoder 
        self.virtual_batch_size = virtual_batch_size 
        self.gamma = gamma 
        self.epsilon = epsilon 
        self.momentum = momentum 
        self.sparsity = sparsity 

        #if feature_column is None:
        #    self.feature = tf.keras.layers.Lambda(identity)
        #else:
        #    self.feature = feature_column 

        if pretrained_encoder is None:
            self.encoder = TabNetEncoder(units=outputs,
                                         n_steps=n_steps,
                                         n_features=n_features,
                                         outputs=outputs,
                                         gamma=gamma,
                                         epsilon=epsilon,
                                         sparsity=sparsity,
                                         virtual_batch_size=self.virtual_batch_size,
                                         momentum=momentum)
        else:
            self.encoder = pretrained_encoder 

    def forward(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None) -> Tuple[tf.Tensor]:
        #X = self.feature(X)
        output, encoded, importance = self.encoder(X)

        prediction = tf.keras.activations.sigmoid(output) # for a Classifier
        #prediction = output # for a Regressor
        return prediction, encoded, importance 
    
    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None) -> tf.Tensor:
        prediction, _, _ = self.forward(X)
        return prediction 

    def transform(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None) -> tf.Tensor:
        _, encoded, _ = self.forward(X)
        return encoded 

    def explain(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None) -> tf.Tensor:
        _, _, importance = self.forward(X)
        return importance 

In [None]:
config = {'regressor_lr': 1e-3,
          'epochs': 1,
          'batch_size': 256,
          'virtual_batch_size': 128,
          }

m = TabNetRegressor(outputs=1,
                     n_steps=3,
                     n_features=2,
                     virtual_batch_size=config['virtual_batch_size'])

m.compile(tf.keras.optimizers.Adam(learning_rate=config['regressor_lr']),
          tf.keras.losses.mean_squared_error)

In [None]:
m.fit(train_ds, validation_data=valid_ds, epochs=config['epochs'])

In [None]:
m.summary()

In [None]:
importance_ds = tf.data.Dataset.from_tensor_slices(encoded_X_train)
importance_ds = importance_ds.shuffle(buffer_size=len(importance_ds))
importance_ds = importance_ds.batch(batch_size=config['batch_size'], drop_remainder=True)

importance = m.explain(list(importance_ds.take(1))[0]).numpy()

important_scores = np.zeros_like(importance)

for data in importance_ds.take(100):
    importance = m.explain(data).numpy()
    important_scores = np.concatenate([important_scores, importance])

pd.Series(important_scores.mean(axis=0), index=encoded_X_train_df.columns).plot.bar(title='Global Importances')

## 2.3 Unsupervised Pretraining

In [None]:
class TabNetDecoder(tf.keras.layers.Layer):
    def __init__(self,
                 units=1,
                 n_steps=3,
                 n_features=8,
                 outputs=1,
                 gamma=1.3,
                 epsilon=1e-8,
                 sparsity=1e-5,
                 virtual_batch_size=128,
                 momentum=0.02):
        
        super().__init__()
        self.units = units 
        self.n_steps = n_steps 
        self.n_features = n_features 
        self.virtual_batch_size = virtual_batch_size 
        self.momentum = momentum 

    def build(self, input_shape: tf.TensorShape):
        self.shared_block = FeatureTransformerBlock(units=self.n_features,
                                                  virtual_batch_size=self.virtual_batch_size,
                                                  momentum=self.momentum)
        self.steps = [FeatureTransformerBlock(units=self.n_features,
                                             virtual_batch_size=self.virtual_batch_size,
                                             momentum=self.momentum) for _ in range(self.n_steps)]
        self.fc = [tf.keras.layers.Dense(units=self.units) for _ in range(self.n_steps)]

    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None) -> tf.Tensor:
        decoded = 0. 

        for ftb, fc in zip(self.steps, self.fc):
            shared = self.shared_block(X, training=training)
            feature = ftb(shared, training=training)
            output = fc(feature)
            
            decoded += output 
        return decoded 

In [None]:
class TabNetAutoencoder(tf.keras.Model):
    def __init__(self,
                 outputs: int=1,
                 inputs: int=12,
                 n_steps: int=3,
                 n_features: int=8,
                 gamma: float=1.3,
                 epsilon: float=1e-8,
                 sparsity: float=1e-5,
                 #feature_column: Optional[tf.keras.layers.DenseFeatures]=None,
                 virtual_batch_size: Optional[int]=128,
                 momentum: Optional[float]=0.02):
        
        super().__init__()
        self.outputs = outputs 
        self.inputs = inputs 
        self.n_steps = n_steps 
        self.n_features = n_features 
        #self.feature_column = feature_column 
        self.virtual_batch_size = virtual_batch_size 
        self.gamma = gamma 
        self.esplison = epsilon 
        self.momentum = momentum 
        self.sparsity = sparsity 
        
        #if feature_column is None:
        #    self.feature = tf.keras.layers.Lambda(identity)
        #else:
        #    self.feature = feature_column 
            
        self.encoder = TabNetEncoder(units=outputs,
                                     n_steps=n_steps,
                                     n_features=n_features,
                                     outputs=outputs,
                                     gamma=gamma,
                                     epsilon=epsilon,
                                     sparsity=sparsity,
                                     virtual_batch_size=self.virtual_batch_size,
                                     momentum=momentum)
        self.decoder = TabNetDecoder(units=inputs,
                                     n_steps=n_steps,
                                     n_features=n_features,
                                     virtual_batch_size=self.virtual_batch_size,
                                     momentum=momentum)
        self.bn = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size,
                                                     momentum=momentum)
        self.do = tf.keras.layers.Dropout(0.25)
        
    def forward(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None) -> Tuple[tf.Tensor]:
        #X = self.feature(X)
        X = self.bn(X)

        # training mask
        M = self.do(tf.ones_like(X), training=training)
        D = X * M

        # encoder 
        output, encoded, importance = self.encoder(D)
        prediction = tf.keras.activations.sigmoid(output) # for a Classifier
        #prediction = output # for a Regressor

        return prediction, encoded, importance, X, M 

    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None) -> tf.Tensor:
        # encode
        prediction, encoded, _, X, M = self.forward(X)
        T = X * (1 - M)

        # decode 
        reconstruction = self.decoder(encoded)

        # loss
        loss = tf.reduce_mean(tf.where(M != 0., tf.square(T - reconstruction), tf.zeros_like(reconstruction)))

        self.add_loss(loss)
        return prediction 

    def transform(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None) -> tf.Tensor:
        _, encoded, _, _, _ = self.forward(X)
        return encoded 

    def explain(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool]=None) -> tf.Tensor:
        _, _, importance, _, _ = self.forward(X)
        return importance 

In [None]:
@tf.function 
def dummy_loss(y, t):
    return 0.

In [None]:
config = {'regressor_lr': 1e-3,
          'ae_lr': 5e-4,
          'epochs': 3,
          'batch_size': 256,
          'virtual_batch_size': 128,
          }

ae = TabNetAutoencoder(outputs=1,
                       inputs=21,
                       n_steps=3,
                       n_features=2,
                       virtual_batch_size=config['virtual_batch_size'])

ae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=config['ae_lr']),
           loss=dummy_loss)

In [None]:
ae.fit(train_ds, epochs=config['epochs'])

In [None]:
ae.summary()

In [None]:
important_scores = np.zeros_like(importance)

for data in importance_ds.take(100):
    importance = ae.explain(data).numpy()
    important_scores = np.concatenate([important_scores, importance])

pd.Series(important_scores.mean(axis=0), index=encoded_X_train_df.columns).plot.bar(title='Global Importances')

## 2.4 Self-supervised Fine-tuning

In [None]:
config = {'cregressor_lr': 1e-3,
          'ae_lr': 5e-4,
          'ft_lr': 2e-4,
          'epochs': 3,
          'batch_size': 256,
          'virtual_batch_size': 128,
          }

pm = TabNetRegressor(outputs=1,
                      n_steps=3,
                      n_features=2,
                      pretrained_encoder=ae.layers[0],
                      virtual_batch_size=128)

pm.compile(tf.keras.optimizers.Adam(learning_rate=config['ft_lr']),
           tf.keras.losses.mean_squared_error)

pm.fit(train_ds, validation_data=valid_ds, epochs=config['epochs'])

In [None]:
important_scores = np.zeros_like(importance)
for data in importance_ds.take(100):
    importance = pm.explain(data).numpy()
    important_scores = np.concatenate([important_scores, importance])

pd.Series(important_scores.mean(axis=0), index=encoded_X_train_df.columns).plot.bar(title='Global Importances')

# 3. Prediction & Submit

In [None]:
test_ds = tf.data.Dataset.from_tensor_slices(encoded_X_test_df)
test_ds = test_ds.batch(batch_size=config['batch_size'], drop_remainder=True)

pred = pm.predict(test_ds)

if len(pred) != len(submission_df):
    test_reminder_df = encoded_X_test_df[-(config['batch_size']):]
    test_reminder_ds = tf.data.Dataset.from_tensor_slices(test_reminder_df)
    test_reminder_ds = test_reminder_ds.batch(config['batch_size'])
    
    reminder_pred = pm.predict(test_reminder_ds)
    
    reminder_num = len(encoded_X_test_df) - len(test_ds) * config['batch_size']
    reminder_pred = reminder_pred[-reminder_num:]
    
    pred = np.concatenate([pred, reminder_pred], axis=0)


#submission_df['congestion'] = (pred * congestion_std) + congestion_mean #Standardization
submission_df['congestion'] = pred * (c_max - c_min) + c_min #min-max scaling

submission_df.to_csv('submission.csv', index=False)
submission_df.tail()