In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
from sklearn.preprocessing import LabelEncoder


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv(
    '/kaggle/input/tabular-playground-series-jan-2022/train.csv',
    index_col='row_id', parse_dates=['date'])
test = pd.read_csv(
    '/kaggle/input/tabular-playground-series-jan-2022/test.csv',
    index_col='row_id', parse_dates=['date'])
submission = pd.read_csv(
    '/kaggle/input/tabular-playground-series-jan-2022/sample_submission.csv',
    index_col='row_id')

### Create simple time features

In [None]:
train['year'] = train['date'].dt.year
train['month'] = train['date'].dt.month
train['day'] = train['date'].dt.day
train['dayofweek'] = train['date'].dt.dayofweek

test['year'] = test['date'].dt.year
test['month'] = test['date'].dt.month
test['day'] = test['date'].dt.day
test['dayofweek'] = test['date'].dt.dayofweek

### Split and drop data

In [None]:
X_train = train.query("year <  2018").drop('num_sold', 1).reset_index(drop=True)
X_valid = train.query("year == 2018").drop('num_sold', 1).reset_index(drop=True)

y_train = train.query("year <  2018")['num_sold'].reset_index(drop=True)
y_valid = train.query("year == 2018")['num_sold'].reset_index(drop=True)

In [None]:
drop_cols = ['date', 'year']

X_train.drop(drop_cols, 1, inplace=True)
X_valid.drop(drop_cols, 1, inplace=True)
test.drop(drop_cols, 1, inplace=True)

### Encode the categoricals

In [None]:
le = LabelEncoder()

for col in X_train.select_dtypes('O'):
    X_train[col] = le.fit_transform(X_train[col])
    X_valid[col] = le.transform(X_valid[col])
    test[col] = le.transform(test[col])

### Fitting

In [None]:
params_init = {
#            'num_leaves': 60,
           'importance_type': 'gain',
           'random_state': 42,
           'n_estimators': 10_000,
           # 'num_leaves': 2**7,
           # 'max_depth': 7,
       }

params_fit = {
    'X': X_train,
    'y': y_train,
    'eval_set': [(X_train, y_train), (X_valid, y_valid)],
    'early_stopping_rounds': 100,
    'verbose': 100,
#     'eval_metric': eval_metric,
#     'categorical_feature': category_feature,
}

In [None]:
model = lgb.LGBMRegressor(**params_init)
model.fit(**params_fit)

In [None]:
benchmark = submission.copy()
benchmark['num_sold'] = model.predict(test)

benchmark.to_csv('lgb_submission.csv')