In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
%%time
import datatable as dt
train = dt.fread("/kaggle/input/tabular-playground-series-mar-2022/train.csv").to_pandas()
test = dt.fread("/kaggle/input/tabular-playground-series-mar-2022/test.csv").to_pandas()
print(train.shape)
print(test.shape)

In [None]:
train.head()

In [None]:
train.describe()

In [None]:
test.describe()

In [None]:
%%time
dir_mapper = {'EB': 0, 
              'NE': 1, 
              'NB': 2, 
              'NW': 3, 
              'WB': 4, 
              'SW': 5, 
              'SB': 6, 
              'SE': 7}
def feature_engineering(data):
    tt = pd.to_datetime(data['time'])
    data['month'] = (tt.dt.month).astype(np.int8)
    data['weekday'] = (tt.dt.weekday).astype(np.int8)
    data['hour'] = (tt.dt.hour).astype(np.int8)
    data['minute'] = (tt.dt.minute).astype(np.int8)
    data['is_month_start'] = (tt.dt.is_month_start).astype(np.int8)
    data['is_month_end'] = (tt.dt.is_month_end).astype(np.int8)
    data['minute_in_a_day'] = (tt.dt.hour * 60 + tt.dt.minute).astype(np.int16)
    data['is_weekend'] = (tt.dt.dayofweek > 4).astype(np.int8)
    data['is_afternoon'] = (tt.dt.hour > 12).astype(np.int8)
    data['direction'] = data['direction'].map(lambda x: dir_mapper[x]).astype(np.int8)
    return data.drop(['time'], axis=1)
train = feature_engineering(train)
train['congestion'] = (train['congestion']/100).astype(np.float)

In [None]:
train.tail()

In [None]:
%%time
from sklearn.model_selection import train_test_split
y = train['congestion']
X = train.drop(['congestion', 'row_id'], axis=1)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.14, test_size=0.06, random_state=0)

In [None]:
import warnings
import tensorflow as tf
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
warnings.filterwarnings('ignore')
model_tf = tf.keras.models.Sequential()
model_tf.add(tf.keras.layers.Input(shape=(len(X_train.columns),)))
model_tf.add(tf.keras.layers.Dense(units=128, activation='relu', use_bias=True)) #64, 128
model_tf.add(tf.keras.layers.Dropout(0.25)) #0.1, 0.25
model_tf.add(tf.keras.layers.BatchNormalization())
model_tf.add(tf.keras.layers.Dense(units=1, activation='sigmoid', use_bias=True))
model_tf.summary()

In [None]:
%%time
model_tf.compile(
    loss="mse",
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), #0.1, 0.01
    metrics=['accuracy'])
model_tf.fit(
    x=X_train,
    y=y_train,
    batch_size=512,
    epochs=20, #3, 20
    validation_data=(X_valid, y_valid))

In [None]:
%%time
test = feature_engineering(test)
x_test = test.drop(['row_id'], axis=1)
pred_test = model_tf.predict(x_test)
submission = pd.DataFrame(data={"row_id" : test.row_id, "congestion" : pred_test.reshape(-1)})
submission['congestion'] = (submission['congestion']*100).astype(np.int8)
submission.to_csv('submission.csv', index=False)
print(submission.shape)
submission.head()