In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
pd.set_option('display.max_columns', 500)

SEED = 2021
import time
from tqdm.notebook import tqdm

# Standard plotly imports
import plotly as py
import plotly.graph_objs as go
import plotly.tools as tls
from plotly.offline import iplot, init_notebook_mode
import plotly.figure_factory as ff
import os

from catboost import CatBoostRegressor

In [None]:
# https://www.kaggle.com/nayuts/tabular-lgbm-with-lightgbm-tuner

DATA = "../input/tabular-playground-series-feb-2021/"
train = pd.read_csv(DATA + "train.csv")
test = pd.read_csv(DATA + "test.csv")

sub = pd.read_csv(DATA + "sample_submission.csv")

In [None]:
dataset = pd.concat([train, test])

In [None]:
train_cat_cols = [f"cat{i}" for i in range(10)]
train_num_col = [f"cont{i}" for i in range(14)]

In [None]:
from sklearn.preprocessing import LabelEncoder

for col in train_cat_cols:
    le = LabelEncoder()
    le.fit(dataset[col])
    train[col] = le.transform(train[col])
    test[col] = le.transform(test[col])

In [None]:
df_train = train.sample(frac=0.9, random_state=0)
df_valid = train.drop(df_train.index)


X_train = df_train[[col for col in train.columns if col in (train_cat_cols + train_num_col)]]
X_valid = df_valid[[col for col in train.columns if col in (train_cat_cols + train_num_col)]]
y_train = df_train['target']
y_valid = df_valid['target']

In [None]:
catboost_params = { 'iterations':10000, 'learning_rate':0.004, 'depth':7, 
                   'random_strength':3, 'min_data_in_leaf':10, 'l2_leaf_reg':5.2, 'loss_function':'RMSE', 
                   'random_seed':SEED, 'eval_metric':'RMSE', 'grow_policy':'Depthwise', 'max_bin':512, 
                   'task_type': 'GPU', 'od_type':'Iter', 'od_wait':50, 'metric_period':500 }

model = CatBoostRegressor(**catboost_params)

In [None]:
model.fit(X_train, y_train, eval_set=(X_valid, y_valid))

In [None]:
pred = model.predict(test[(train_cat_cols + train_num_col)])
sub["target"] = pred
sub.to_csv('submission.csv', index=False)