In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
import optuna
from catboost import CatBoostRegressor

In [None]:
df = pd.read_csv("../input/30days-folds/train_folds.csv")
df_test = pd.read_csv("../input/30-days-of-ml/test.csv")
sample_submission = pd.read_csv("../input/30-days-of-ml/sample_submission.csv")

useful_features = [c for c in df.columns if c not in ("id", "target", "kfold")]
object_cols = [col for col in useful_features if col.startswith("cat")]
df_test = df_test[useful_features]

In [None]:
def run(trial):
    fold = 2
    depth = trial.suggest_int("max_depth", 1, 15)
    max_bin = trial.suggest_int('max_bin', 1, 32)
    l2_leaf_reg = trial.suggest_float('l2_leaf_reg', 0, 5)
    min_data_in_leaf = trial.suggest_int('min_data_in_leaf', 1, 30)
    one_hot_max_size = trial.suggest_int('one_hot_max_size', 2, 16) #uncomment if using categorical features
    bootstrap_type = trial.suggest_categorical('bootstrap_type',['Poisson'])
    learning_rate =  trial.suggest_float('learning_rate', 0.001, 1.5)
    eval_metric = "RMSE"
    iterations = 15000
    random_state=42
    subsample = trial.suggest_float("subsample", 0.1, 1.0)
    task_type = 'GPU'
    od_wait = 400
    od_type = 'Iter'
    verbose = 0
    grow_policy = trial.suggest_categorical('grow_policy',['SymmetricTree','Depthwise'])
    fold_len_multiplier = trial.suggest_float('fold_len_multiplier', np.log(4), np.log(10),log = True)
    

    xtrain = df[df.kfold != fold].reset_index(drop=True)
    xvalid = df[df.kfold == fold].reset_index(drop=True)

    ytrain = xtrain.target
    yvalid = xvalid.target

    xtrain = xtrain[useful_features]
    xvalid = xvalid[useful_features]

#     ordinal_encoder = preprocessing.OrdinalEncoder()
#     xtrain[object_cols] = ordinal_encoder.fit_transform(xtrain[object_cols])
#     xvalid[object_cols] = ordinal_encoder.transform(xvalid[object_cols])

    model = CatBoostRegressor(
        depth=depth,
        max_bin=max_bin,
        l2_leaf_reg=l2_leaf_reg,
        min_data_in_leaf=min_data_in_leaf,
        one_hot_max_size=one_hot_max_size,
        bootstrap_type=bootstrap_type,
        learning_rate = learning_rate,
        eval_metric=eval_metric,
        iterations = iterations,
        random_state=random_state,
        subsample=subsample,
        task_type=task_type,
        od_wait=od_wait,
        od_type=od_type,
        verbose=verbose,
        grow_policy=grow_policy,
        fold_len_multiplier=fold_len_multiplier,
        cat_features=object_cols
    )
    model.fit(xtrain, ytrain, eval_set=[(xvalid, yvalid)], verbose=1000)
    preds_valid = model.predict(xvalid)
    rmse = mean_squared_error(yvalid, preds_valid, squared=False)
    return rmse

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(run, n_trials=800)

In [None]:
study.best_params

In [None]:
df = study.trials_dataframe()

In [None]:
df.to_csv("studycatb1.csv")