Sometimes, there are more effective model than lightgbm or xgboost. So I tried __all regressors in scikit-learn including lightgbm, xgboost and catboost__. I didn't do any parameter tuning (too many params...). I don't know exactly about some of regressors, but I tried as many as I can. 

# packages

In [None]:
import pandas as pd
import numpy as np
import time
import warnings
warnings.filterwarnings(action='ignore')

import gc

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import RadiusNeighborsRegressor, KNeighborsRegressor
from sklearn.linear_model import GammaRegressor, HuberRegressor, PassiveAggressiveRegressor, PoissonRegressor
from sklearn.linear_model import RANSACRegressor, SGDRegressor, TheilSenRegressor, TweedieRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.ensemble import ExtraTreesRegressor as ens_ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor, BaggingRegressor, GradientBoostingRegressor, RandomForestRegressor

from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor 

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

# loading files

In [None]:
path= '../input/tabular-playground-series-feb-2021/'
train = pd.read_csv(path + 'train.csv')
test = pd.read_csv(path + 'test.csv')
ss = pd.read_csv(path + 'sample_submission.csv')

In [None]:
train

In [None]:
test

# functions

In [None]:
def get_validation_df(X_train, y_train):
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)
    return X_train, X_val, y_train, y_val

def get_preprocessed_df(train_df, test_df):
    
    train = train_df.copy()
    test = test_df.copy()
    
    cat_cols = ['cat{}'.format(i) for i in range(10)]
    cont_cols = ['cont{}'.format(i) for i in range(14)]

    X_train = train[cat_cols+cont_cols]
    y_train = train['target']
    X_test = test.drop('id',axis=1)
    del train, test
    
    # continuous - standard scaling
    st_scaler = StandardScaler()
    st_scaler.fit(X_train[cont_cols])
    X_train[cont_cols] = st_scaler.transform(X_train[cont_cols])
    X_test[cont_cols] = st_scaler.transform(X_test[cont_cols])
    
    # categorical -> one-hot encoding
    oh_enc = OneHotEncoder()
    oh_enc.fit(X_train[cat_cols])
    oh_cols = oh_enc.get_feature_names(cat_cols)

    X_train[oh_cols] = oh_enc.transform(X_train[cat_cols]).toarray()
    X_test[oh_cols] = oh_enc.transform(X_test[cat_cols]).toarray()    
    
    X_train.drop(cat_cols, axis=1, inplace=True)
    X_test.drop(cat_cols, axis=1, inplace=True)
    
    X_train, X_val, y_train, y_val = get_validation_df(X_train, y_train)
    
    return X_train, X_val, y_train, y_val, X_test

def eval_score(y_true, pred):

    print('rmse:', rmse)
    print('mae:', mae)
    print('r2:', r2)
    
    return rmse, mae, r2
    
def eval_model(model, X_train, X_val, y_train, y_val, model_name, scores_dict):
    start = time.time()
    
    model.fit(X_train, y_train)
    pred = model.predict(X_val)
    
    rmse = round(mean_squared_error(y_val, pred, squared=False),4)
    mae = round(mean_absolute_error(y_val, pred),4)
    r2 = round(r2_score(y_val,pred),4)
    print('rmse:', rmse)
    print('mae:', mae)
    print('r2:', r2)
    
    scores_dict[model_name] = [rmse, mae, r2]
    
    print('execution time: {:.4f}s'.format(time.time()-start))
    
    return scores_dict

# data preprocessing

In [None]:
X_train, X_val, y_train, y_val, X_test = get_preprocessed_df(train, test)

In [None]:
print('Train set:', X_train.shape, y_train.shape)
print('Validation set:', X_val.shape, y_val.shape)
print('Test set:', X_test.shape)

# making models

In [None]:
# making models

dt_reg = DecisionTreeRegressor()
et_reg = ExtraTreeRegressor()
mlp_reg = MLPRegressor()
#rn_reg = RadiusNeighborsRegressor(n_jobs=-1) it raises an error
#kn_reg = KNeighborsRegressor(n_jobs=-1) it takes so long
#gm_reg = GammaRegressor() Some value(s) of y are out of the valid range for family GammaDistribution
hn_reg = HuberRegressor()
pa_reg = PassiveAggressiveRegressor()
ps_reg = PoissonRegressor()
rs_reg = RANSACRegressor() 
sgd_reg = SGDRegressor()
#ts_reg = TheilSenRegressor() over memory 
twd_reg = TweedieRegressor()
#gp_reg = GaussianProcessRegressor() over memory
eet_reg = ens_ExtraTreesRegressor()
ab_reg = AdaBoostRegressor()
bg_reg = BaggingRegressor()
gb_reg = GradientBoostingRegressor()
rf_reg = RandomForestRegressor()
hgb_reg = HistGradientBoostingRegressor()
xgb_reg = XGBRegressor()
lgbm_reg = LGBMRegressor()
cat_reg = CatBoostRegressor()

models = [
    ('DecisionTreeRegressor', dt_reg),
    ('ExtraTreeRegressor', et_reg),
    ('MLPRegressor', mlp_reg),
    #('RadiusNeighborsRegressor', rn_reg)
    #('KNeighborsRegressor', kn_reg),
    #('GammaRegressor', gm_reg),
    ('HuberRegressor', hn_reg),
    ('PassiveAggressiveRegressor', pa_reg),
    ('PoissonRegressor', ps_reg),
    ('RANSACRegressor', rs_reg),
    ('SGDRegressor', sgd_reg),
    #('TheilSenRegressor', ts_reg),
    ('TweedieRegressor', twd_reg),
    #('GaussianProcessRegressor', gp_reg),
    ('EnsembleExtraTreesRegressor', eet_reg),
    ('AdaBoostRegressor', ab_reg),
    ('BaggingRegressor', bg_reg),
    ('GradientBoostingRegressor', gb_reg),
    ('RandomForestRegressor', rf_reg),
    ('HistGradientBoostingRegressor', hgb_reg),
    ('XGBRegressor', xgb_reg),
    ('LGBMRegressor', lgbm_reg),
    ('CatBoostRegressor', cat_reg)
]

# model fitting

In [None]:
scores_dict = dict()
for model_name, model in models:
    print('#',model_name)
    scores_dict = eval_model(model, X_train, X_val, y_train, y_val, model_name, scores_dict)
    print()
    gc.collect()

# results

In [None]:
result = pd.DataFrame.from_dict(scores_dict, orient='index', columns=['rmse','mae','r2'])
result.drop('RANSACRegressor',axis=0, inplace=True) # result outlier 
result

With basic params, __Catboost__ made best score

In [None]:
import plotly.express as px
data = result.sort_values('rmse')
fig = px.bar(data, x=data.index, y='rmse', color=data['rmse'])
fig.show()

In [None]:
data = result.sort_values('mae')
fig = px.bar(data, x=data.index, y='mae', color=data['mae'])
fig.show()

actually, __r2_score__ was meaningless in this experiments

In [None]:
data = result.sort_values('r2')
fig = px.bar(data, x=data.index, y='r2', color=data['r2'])
fig.show()

In [None]:
ss['target'] = cat_reg.predict(X_test)
ss.to_csv('submission.csv', index=False)