**PLEASE UPVOTE https://www.kaggle.com/alexryzhkov/lightautoml-with-fe-tps-mar-22**

In [None]:
!pip install -U lightautoml

In [None]:
import os
import time
import requests

import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import torch

from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

In [None]:
def feature_engineering(data):
    data['time'] = pd.to_datetime(data['time'])
    data['month'] = data['time'].dt.month
    data['weekday'] = data['time'].dt.weekday
    data['hour'] = data['time'].dt.hour
    data['minute'] = data['time'].dt.minute
    data['is_month_start'] = data['time'].dt.is_month_start.astype('int')
    data['is_month_end'] = data['time'].dt.is_month_end.astype('int')
    data['hour+minute'] = data['time'].dt.hour * 60 + data['time'].dt.minute
    data['is_weekend'] = (data['time'].dt.dayofweek > 4).astype('int')
    data['is_afternoon'] = (data['time'].dt.hour > 12).astype('int')
    data['x'] = data['x'].astype('str')
    data['y'] = data['y'].astype('str')
    data['x+y'] = data['x'].astype('str') + data['y'].astype('str')
    data['x+direction'] = data['x'].astype('str') + data['direction'].astype('str')
    data['y+direction'] = data['y'].astype('str') + data['direction'].astype('str')
    data['x+y+direction'] = data['x'].astype('str') + data['y'].astype('str') + data['direction'].astype('str')
    data['hour+x'] = data['hour'].astype('str') + data['x'].astype('str')
    data['hour+y'] = data['hour'].astype('str') + data['y'].astype('str')
    data['hour+x+y'] = data['hour'].astype('str') + data['x'].astype('str') + data['y'].astype('str')
    data['hour+direction'] = data['hour'].astype('str') + data['direction'].astype('str')
    data['hour+x+direction'] = data['hour'].astype('str') + data['x'].astype('str') + data['direction'].astype('str')
    data['hour+y+direction'] = data['hour'].astype('str') + data['y'].astype('str') + data['direction'].astype('str')
    data['hour+x+y+direction'] = data['hour'].astype('str') + data['x'].astype('str') + data['y'].astype('str') + data['direction'].astype('str')
    
    data = data.drop(['time'], axis=1)
    
    return data

In [None]:
N_THREADS = 4
RANDOM_STATE = 42
TIMEOUT = 8 * 3600
TARGET_NAME = 'congestion'

In [None]:
np.random.seed(RANDOM_STATE)
torch.set_num_threads(N_THREADS)

In [None]:
train_data = pd.read_csv('../input/tabular-playground-series-mar-2022/train.csv', dtype={'time': str})
test_data = pd.read_csv('../input/tabular-playground-series-mar-2022/test.csv', dtype={'time': str})
sample_submission = pd.read_csv('../input/tabular-playground-series-mar-2022/sample_submission.csv')

In [None]:
for data in [train_data, test_data]:
    data = feature_engineering(data)

In [None]:
task = Task('reg', metric='mae', loss='mae')

In [None]:
roles = {'target': TARGET_NAME, 'drop': ['row_id']}

In [None]:
%%time

automl = TabularAutoML(task=task,
                       timeout=TIMEOUT,
                       cpu_limit=N_THREADS,
                       reader_params={'n_jobs': N_THREADS, 'random_state': RANDOM_STATE},
                       general_params={'use_algos':[['lgb']]})

oof_pred = automl.fit_predict(train_data, roles=roles, verbose=3)
print('oof_pred:\n{}\nShape = {}'.format(oof_pred, oof_pred.shape))

In [None]:
%%time

fast_fi = automl.get_feature_scores('fast')
fast_fi.set_index('Feature')['Importance'].plot.bar(figsize=(20, 10), grid=True, color='indigo')

In [None]:
%%time

test_pred = automl.predict(test_data)
print(f'TEST DATA PREDICTION:\n{test_pred}\nSHAPE: {test_data.shape}')

In [None]:
sample_submission['congestion'] = test_pred.data[:, 0]
sample_submission.to_csv('lighautoml_fe.csv', index=False)
sample_submission