In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

from keras.models import  Sequential
from keras.layers.core import  Reshape , Dense, Flatten, Dropout
from keras.callbacks import EarlyStopping
from keras.layers import BatchNormalization, Convolution1D , MaxPooling1D, Normalization, TextVectorization, Embedding
from keras import Model, Input, layers
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train = pd.read_csv('/kaggle/input/tabular-playground-series-mar-2022/train.csv')

print(train.shape)
print(train.info())
train.head()

In [None]:
test = pd.read_csv('/kaggle/input/tabular-playground-series-mar-2022/test.csv')

print(test.shape)
print(test.info())
test.head()

## Feature Engineering
inspired by: https://www.kaggle.com/code/martynovandrey/tps-mar-22-fe-the-less-the-better

In [None]:
train['time'] = pd.to_datetime(train['time'])
test['time'] = pd.to_datetime(test['time'])

train['road'] = train['x'].astype(str) + train['y'].astype(str) + train['direction']
test['road'] = test['x'].astype(str) + test['y'].astype(str) + test['direction']

In [None]:
def add_basic_features(df):
    df['day']     = df['time'].dt.day
    df['weekday'] = df['time'].dt.weekday
    df['hour']    = df['time'].dt.hour
    df['minute']  = df['time'].dt.minute
    
    df = pd.concat([df, pd.get_dummies(df['road'], dtype='int')], axis=1)
    return df

In [None]:
train = add_basic_features(train)
test = add_basic_features(test)

In [None]:
medians = pd.DataFrame(train.groupby(['road', 'weekday', 'hour', 'minute']).congestion.median().astype(int)).reset_index()
medians = medians.rename(columns={'congestion':'median'})
train = train.merge(medians, on=['road', 'weekday', 'hour', 'minute'], how='left')
test = test.merge(medians, on=['road', 'weekday', 'hour', 'minute'], how='left')

In [None]:
mins = pd.DataFrame(train.groupby(['road', 'weekday', 'hour', 'minute']).congestion.min().astype(int)).reset_index()
mins = mins.rename(columns={'congestion':'min'})
train = train.merge(mins, on=['road', 'weekday', 'hour', 'minute'], how='left')
test = test.merge(mins, on=['road', 'weekday', 'hour', 'minute'], how='left')

In [None]:
maxs = pd.DataFrame(train.groupby(['road', 'weekday', 'hour', 'minute']).congestion.max().astype(int)).reset_index()
maxs = maxs.rename(columns={'congestion':'max'})
train = train.merge(maxs, on=['road', 'weekday', 'hour', 'minute'], how='left')
test = test.merge(maxs, on=['road', 'weekday', 'hour', 'minute'], how='left')

In [None]:
quantile25 = pd.DataFrame(train.groupby(['road', 'weekday', 'hour', 'minute']).congestion.quantile([.25]).astype(int)).reset_index()
quantile25 = quantile25.rename(columns={'congestion':'quantile25'}).drop(['level_4'], axis=1)
train = train.merge(quantile25, on=['road', 'weekday', 'hour', 'minute'], how='left')
test = test.merge(quantile25, on=['road', 'weekday', 'hour', 'minute'], how='left')

In [None]:
quantile75 = pd.DataFrame(train.groupby(['road', 'weekday', 'hour', 'minute']).congestion.quantile([.75]).astype(int)).reset_index()
quantile75 = quantile75.rename(columns={'congestion':'quantile75'}).drop(['level_4'], axis=1)
train = train.merge(quantile75, on=['road', 'weekday', 'hour', 'minute'], how='left')
test = test.merge(quantile75, on=['road', 'weekday', 'hour', 'minute'], how='left')

In [None]:
features = ['00EB', '00NB', '00SB', '01EB', '01NB', '01SB', '01WB', '02EB', '02NB', '02SB', '02WB',
            '03EB', '03NB', '03NE', '03SB', '03SW', '03WB', '10EB', '10NB', '10NE',
            '10SB', '10SW', '10WB', '11EB', '11NB', '11SB', '11WB', '12EB', '12NB',
            '12NE', '12SB', '12SW', '12WB', '13EB', '13NB', '13NE', '13SB', '13SW',
            '13WB', '20EB', '20NB', '20SB', '20WB', '21EB', '21NB', '21NE', '21NW',
            '21SB', '21SE', '21SW', '21WB', '22EB', '22NB', '22NE', '22NW', '22SB',
            '22SE', '22SW', '22WB', '23EB', '23NB', '23NE', '23SB', '23SW', '23WB',
            'median', 'quantile25', 'quantile75', 'min', 'max']

X_train = train.loc[:, features].values
y = train.congestion.values
X_test = test.loc[:, features].values

## Model
(Tried some other approaches but in the end of the day, the simplest sequential model performed the best)

In [None]:
normalizer = Normalization(axis=-1)
normalizer.adapt(X_train)

In [None]:
# Simple sequential

model = Sequential([
    normalizer,
    Dense(64, activation='relu'),
    Dense(1)
])

In [None]:
model.compile(optimizer='Adam', loss='mean_absolute_error')

callbacks = [EarlyStopping(monitor='val_loss', patience=5)]

history = model.fit(x=X_train, y=y, validation_split=0.2, epochs=20, callbacks=callbacks, batch_size=64)

## Submission

In [None]:
predictions = model.predict(X_test, verbose=0)
predictions.mean()

In [None]:
submissions=pd.DataFrame({'row_id': test.row_id,
                         'congestion': predictions.reshape((predictions.shape[0],))})
submissions.to_csv('predictions.csv', index=False, header=True)