In [None]:
#!pip install pycaret[full]
import pandas as pd
import seaborn as sns
import numpy as np
from statsmodels.distributions.empirical_distribution import ECDF
import matplotlib.pyplot as plt
import statsmodels.api as sm
import calendar

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.model_selection import train_test_split
train = pd.read_csv('../input/tabular-playground-series-mar-2022/train.csv')
test = pd.read_csv('../input/tabular-playground-series-mar-2022/test.csv')
submission  = pd.read_csv('../input/tabular-playground-series-mar-2022/sample_submission.csv')
test['time'] = pd.to_datetime(test['time'])
train['time'] = pd.to_datetime(train['time'])
train['congestion'] = train['congestion'].astype(int)

In [None]:
combined=pd.concat([train,test])
combined['day'] = combined['time'].dt.day
combined['month'] =  combined['time'].dt.month
combined['hour'] = combined['time'].dt.hour
combined['minute'] = combined['time'].dt.minute
combined['weekday']= combined['time'].dt.weekday
combined['am'] = ((combined['hour']<12) & (combined['hour']>6)).map({False:0, True : 1})
combined['moment']= (combined['time'].dt.hour - 12)*3 + combined['time'].dt.minute//20
combined.drop(['time'],axis=1,inplace=True)
combined['road']  = combined['x'].astype(str) + combined['y'].astype(str) + combined['direction'].astype(str)
combined.drop(['row_id'],axis=1,inplace=True)

In [None]:
drop = ['x','y','day','direction','hour','minute']
combined.drop(drop,axis=1,inplace=True)
X = combined[:848835].copy()
X_test = combined[848835:].copy()
##Weekday clean
X = X[X['weekday']<=4]

In [None]:
mins = X.groupby(['road','weekday','moment'])['congestion'].min().reset_index()
mins.columns = ['road','weekday','moment','min_congestion']
maxs = X.groupby(['road','weekday','moment'])['congestion'].max().reset_index()
maxs.columns = ['road','weekday','moment','max_congestion']
medians = X.groupby(['road','weekday','moment'])['congestion'].median().reset_index()
medians.columns = ['road','weekday','moment','median_congestion']
X = pd.merge(X,mins,how='left',on = ['road','weekday','moment'])
X_test = pd.merge(X_test,mins,how='left',on = ['road','weekday','moment'])
X = pd.merge(X,maxs,how='left',on = ['road','weekday','moment'])
X_test = pd.merge(X_test,maxs,how='left',on = ['road','weekday','moment'])
X = pd.merge(X,medians,how='left',on = ['road','weekday','moment'])
X_test = pd.merge(X_test,medians,how='left',on = ['road','weekday','moment'])
hot_encode = ['road']
X = pd.get_dummies(X, columns = hot_encode)
X_test = pd.get_dummies(X_test, columns = hot_encode)

In [None]:
X_train = X.drop(['congestion'],axis=1)
Y_train = X['congestion']
X_test.drop(['congestion'],axis=1,inplace=True)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)

In [None]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras.layers import *
from tensorflow.keras import activations,callbacks
import tensorflow_addons as tfa
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import *
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from keras.models import Model

In [None]:
def nn():
    inp = Input(shape=(72))
    x = Dense(16,activation= 'relu',kernel_initializer ='he_uniform')(inp)
    final_x = Dense(1, activation = 'linear',kernel_initializer ='he_uniform',name = 'last')(x)
    model  = Model(inp,final_x)
    return model

In [None]:
NN = nn()

tqdm_callback = tfa.callbacks.TQDMProgressBar()
loss = 'mean_absolute_error'
optimizer= Adam()
es = tf.keras.callbacks.EarlyStopping( monitor= 'val_loss', patience=5, verbose=0,
    mode='auto', baseline=None, restore_best_weights=True)

plateau = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=0,
    mode='auto')

In [None]:
from sklearn.model_selection import GroupKFold,KFold
from sklearn.metrics import *

pred_folds_soft_vote = []
pred_train = np.zeros(shape=(X_train.shape[0]))

GROUPS = X_train['weekday']
gkf = GroupKFold(n_splits=4)

fold =0
for train_index, val_index in gkf.split(X_train_sc, Y_train , groups = GROUPS) :
    fold +=1
    x_tr = X_train_sc[train_index]
    y_tr = Y_train[train_index]
    x_val = X_train_sc[val_index]
    y_val= Y_train[val_index]
    
    model = nn()
    model.compile(loss='mean_absolute_error',
                  metrics = 'mean_absolute_error',
                  optimizer = keras.optimizers.Adam(learning_rate=0.001))

    model.fit(x_tr,y_tr,
              batch_size = 512, 
              validation_data=(x_val,y_val),
              epochs=60,
              callbacks=[es, plateau,tqdm_callback],
              verbose =2)
    
    pred_val = np.round(model.predict(x_val))
    score_fold = mean_absolute_error(y_val,pred_val)
    print('SCORE FOLD {} = {}'.format(fold,score_fold))
    pred_train[val_index]= pred_val.squeeze()
    
    pred = np.round(model.predict(X_test_sc))
    pred_folds_soft_vote.append(pred)
    
pred_folds_soft_vote = np.round(np.mean(pred_folds_soft_vote,axis=0))

score_total = mean_absolute_error(Y_train,pred_train)
print('SCORE OOF',score_total)

In [None]:
X_test['pred']=pred_folds_soft_vote.astype(int)

In [None]:
train = pd.read_csv('../input/tabular-playground-series-mar-2022/train.csv')
train['time'] = pd.to_datetime(train.time)                                               
train=train[(train.time.dt.weekday< 4) & (train.time.dt.month > 4)]   
train['day']=train['time'].dt.dayofyear
train['time']= (train['time'].dt.hour - 12)*3 + train['time'].dt.minute//20
sep = train[(train.day >= 246) & (train.time >= 0)]

In [None]:
lower = sep.groupby(['time', 'x', 'y', 'direction']).congestion.quantile(0.15).values
upper = sep.groupby(['time', 'x', 'y', 'direction']).congestion.quantile(0.7).values

In [None]:
X_test['pred'] = pred_folds_soft_vote
X_test.pred = X_test.pred.clip(lower, upper)

In [None]:
train = pd.read_csv('../input/tabular-playground-series-mar-2022/train.csv')
test = pd.read_csv('../input/tabular-playground-series-mar-2022/test.csv')
test['road']  = test['x'].astype(str) + test['y'].astype(str) + test['direction'].astype(str)
train['road']  = train['x'].astype(str) + train['y'].astype(str) + train['direction'].astype(str)

In [None]:
test['pred']=X_test.pred.copy()

In [None]:
for road in set(test.road):
    road_counts = train.loc[train.road ==road,'congestion'].value_counts()
    #print(road_counts)
    l = road_counts[(road_counts > 200)] 
    if len(l) > 2: # experimental
        l = list(l.index)
        test.loc[test.road ==road,'pred']= test.loc[test.road ==road,'pred'].map(lambda y: min(l, key=lambda x:abs(x-y)))

In [None]:
test['congestion']=test['pred'].copy()
submission = test[['row_id','congestion']]
submission.to_csv('2-NN-with-post.csv',index=False)