In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, AdaBoostRegressor, VotingRegressor
from sklearn.tree import DecisionTreeRegressor
import lightgbm as lgb

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv(
    '/kaggle/input/tabular-playground-series-jan-2022/train.csv',
    index_col='row_id', parse_dates=['date'])
test = pd.read_csv(
    '/kaggle/input/tabular-playground-series-jan-2022/test.csv',
    index_col='row_id', parse_dates=['date'])
submission = pd.read_csv(
    '/kaggle/input/tabular-playground-series-jan-2022/sample_submission.csv',
    index_col='row_id')

## Preprocessing
### Create simple time features
I don't include the year feature because the time series has seasonal trend.

In [None]:
train_y = train.pop('num_sold')

train['month'] = train['date'].dt.month
train['weekend'] = train['date'].dt.dayofweek>4
train['dayofyear'] = train['date'].dt.dayofyear

test['month'] = test['date'].dt.month
test['weekend'] = test['date'].dt.dayofweek>4
test['dayofyear'] = test['date'].dt.dayofyear

train = train.drop('date', axis='columns')
test = test.drop('date', axis='columns')

### Encode the categoricals

In [None]:
country_map = {'Finland': 0, 'Sweden': 1, 'Norway': 2}
store_map = {'KaggleMart': 0, 'KaggleRama': 1}
product_map = {'Kaggle Mug': 0, 'Kaggle Hat': 1, 'Kaggle Sticker': 2}

train['country'] = train['country'].map(country_map)
train['store'] = train['store'].map(store_map)
train['product'] = train['product'].map(product_map)

test['country'] = test['country'].map(country_map)
test['store'] = test['store'].map(store_map)
test['product'] = test['product'].map(product_map)

## Modelling

In [None]:
lgb_gbdt = lgb.LGBMRegressor()
lgb_2 = lgb.LGBMRegressor(subsample=0.8)
lgb_goss = lgb.LGBMRegressor(boosting_type='goss')

lgb_gbdt.fit(train, train_y)
lgb_2.fit(train, train_y)
lgb_goss.fit(train, train_y)
models = [lgb_gbdt, lgb_2, lgb_goss]

predictions = np.zeros((test.shape[0], 3))
for i in range(3):
    predictions[:, i] = models[i].predict(test)
prediction = predictions.mean(axis=1).reshape(-1,1)

## Submission

In [None]:
my_submission = submission.copy()
my_submission['num_sold'] = prediction
my_submission.to_csv('submission.csv')