In [None]:
!pip install -U lightautoml

# Import library

In [None]:
import pandas as pd
import numpy as np

from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

In [None]:
df_train = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv')
df_train.head()

### Feature generation

In [None]:
import datetime

In [None]:
def get_weekday(date):
    year, month, day = date.split('-')
    return datetime.date(int(year), int(month), int(day)).weekday()

In [None]:
df_train['weekday'] = df_train['date'].apply(get_weekday)
df_train.head()

In [None]:
list_date = np.array(df_train.date.str.split('-').to_list()).astype(int)
df_train['year'] = list_date[:,0]
df_train['month'] = list_date[:,1]
df_train['day'] = list_date[:,2]
df_train.head()

### Train 

In [None]:
TARGET = 'num_sold'
DROP = 'row_id'
N_FOLDS = 7
N_THREADS = 4
RANDOM_STATE = 47
TEST_SIZE = 0.2
TIMEOUT = 1 * 3600

In [None]:
def smape(y_true, y_pred):
    return 1/len(y_true) * np.sum(2 * np.abs(y_pred-y_true) / (np.abs(y_true) + np.abs(y_pred))*100)

In [None]:
task = Task('reg',
           metric = smape)

In [None]:
roles = {
    'target': TARGET,
    'drop': DROP,  
    
}

In [None]:
%%time 

automl = TabularAutoML(task = task, 
                               timeout = TIMEOUT,
                               cpu_limit = N_THREADS,
                               reader_params = {
                                                'n_jobs': N_THREADS,
                                                'cv': N_FOLDS
                                               },
                               general_params = {
                               'use_algos': [['lgb_tuned', 'lgb','cb_tuned', 'cb'],['lgb_tuned', 'lgb','cb_tuned', 'cb']],
                               }
                             )

In [None]:
%%time

oof_pred = automl.fit_predict(df_train, roles = roles, verbose=1)
print('oof_pred:\n{}\nShape = {}'.format(oof_pred, oof_pred.shape))

### Predict

In [None]:
df_test = pd.read_csv('../input/tabular-playground-series-jan-2022/test.csv')
submission = pd.read_csv('../input/tabular-playground-series-jan-2022/sample_submission.csv')

In [None]:
df_test['weekday'] = df_test['date'].apply(get_weekday)

In [None]:
list_test_date = np.array(df_test.date.str.split('-').to_list()).astype(int)
df_test['year'] = list_test_date[:,0]
df_test['month'] = list_test_date[:,1]
df_test['day'] = list_test_date[:,2]

In [None]:
test_pred = automl.predict(df_test)
test_pred.data.astype(int)

In [None]:
submission['num_sold'] = np.round(test_pred.data, 0).astype(int)
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)