In [None]:
pip install -U lightautoml

In [None]:
import numpy as np 
import pandas as pd 

from sklearn.metrics import mean_squared_error
import torch

# LightAutoML presets, task and report generation
from lightautoml.automl.presets.tabular_presets import TabularUtilizedAutoML
from lightautoml.tasks import Task

In [None]:
def create_feautures(data):
    data['time'] = pd.to_datetime(data['time'])
    data['month'] = data['time'].dt.month
    data['dayofweek'] = data['time'].dt.dayofweek
    data['hour'] = data['time'].dt.hour
    data['minute'] = data['time'].dt.minute
    data['hour+minute'] = data['time'].dt.hour * 60 + data['time'].dt.minute
    data["is_weekend"] = data['time'].dt.dayofweek > 4
    
    data['x+y'] = data['x'].astype('str') + data['y'].astype('str')
    
    data = data.drop(['time'], axis=1)
    return data


def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)  
        else:
            df[col] = df[col].astype('category')
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [None]:
train = pd.read_csv("../input/tabular-playground-series-mar-2022/train.csv", index_col=0)
test = pd.read_csv("../input/tabular-playground-series-mar-2022/test.csv", index_col=0)
sub = pd.read_csv('../input/tabular-playground-series-mar-2022/sample_submission.csv')

In [None]:
train.head()

In [None]:
train = create_feautures(train)
test = create_feautures(test)

In [None]:
train.head()

In [None]:
train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

In [None]:
N_THREADS = 4
N_FOLDS = 5
RANDOM_STATE = 42
TEST_SIZE = 0.2
TIMEOUT = 500
TARGET_NAME = 'congestion'

np.random.seed(RANDOM_STATE)
torch.set_num_threads(N_THREADS)

task = Task('reg')

roles = {
    'target': TARGET_NAME,
     # DatetimeRole(base_date=False, seasonality=(), base_feats=True, country='USA'): 'time', 
}

In [None]:
automl = TabularUtilizedAutoML(
    task = task, 
    timeout = TIMEOUT,
    cpu_limit = N_THREADS,
    reader_params = {'n_jobs': N_THREADS, 'cv': N_FOLDS, 'random_state': RANDOM_STATE},
    #general_params = {'use_algos': [['catboost', 'lgb', 'lgb_tuned', 'catboost_tuned']]},
    #tuning_params = {'max_tuning_iter': 20, 'max_tuning_time': 30}
)

In [None]:
%%time 
oof_pred = automl.fit_predict(train, roles=roles, verbose=1)

In [None]:
pred = automl.predict(test)
pred

In [None]:
sub["congestion"] = pred.data[:, 0]
sub.to_csv("submission.csv", index=False)
sub