This notebooks builds on the approach in https://www.kaggle.com/mfedeli/tabular-playground-series-jan-2022 - thanks for sharing!

In [None]:
%%capture
!pip install pycaret[full]

In [None]:
import pandas as pd
import numpy as np 
from pycaret.regression import *

In [None]:
train = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv',index_col='row_id')
test = pd.read_csv('../input/tabular-playground-series-jan-2022/test.csv',index_col='row_id')

In [None]:
def pre_process(df):
    
    df['date'] = pd.to_datetime(df['date'])
    df['week']= df['date'].dt.week
    df['year'] = 'Y'+df['date'].dt.year.astype(str)
    df['quarter'] = 'Q'+df['date'].dt.quarter.astype(str)
    df['day'] = df['date'].dt.day
    df['dayofyear'] = df['date'].dt.dayofyear
    df.loc[(df.date.dt.is_leap_year) & (df.dayofyear >= 60),'dayofyear'] -= 1
    df['weekend'] = df['date'].dt.weekday >=5
    df['weekday'] = 'WD' + df['date'].dt.weekday.astype(str)
    df.drop(columns=['date'],inplace=True)   

pre_process(train)
pre_process(test)

In [None]:
train.info(), test.info()

In [None]:
# Credit to https://www.kaggle.com/c/web-traffic-time-series-forecasting/discussion/36414
def SMAPE(y_true, y_pred):
    denominator = (y_true + np.abs(y_pred)) / 200.0
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return np.mean(diff)

In [None]:
reg = setup(data = train,
            target = 'num_sold',
            normalize=True,
            normalize_method='robust',
            transform_target = True,
            data_split_shuffle = False, #so that we do not use "future" observations to predict "past" observations
            create_clusters = False,
            use_gpu = True,
            silent = True,
            fold=10,
            n_jobs = -1)

In [None]:
add_metric('SMAPE', 'SMAPE', SMAPE, greater_is_better = False)
top =compare_models(sort = 'SMAPE',n_select = 3, include = ['catboost','lightgbm','xgboost'])

In [None]:
blend = blend_models(top)
predict_model(blend)

In [None]:
final_blend = finalize_model(blend)
predict_model(final_blend)

In [None]:
preds = predict_model(final_blend, data=test)
sub = pd.DataFrame(list(zip(test.index,preds.Label)),columns = ['row_id', 'num_sold'])
sub.to_csv('submission.csv', index = False)
print(sub.head(),sub.describe())