### `ENVIROMENT & DATA`

In [1]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [2]:
train = pd.read_csv('train_trans.csv')
test = pd.read_csv('test_trans.csv')

### `TRAIN TEST SPLIT`

In [3]:
from sklearn.model_selection import train_test_split

X = train.drop('amount_spent_per_room_night_scaled', axis=1)
y = train.amount_spent_per_room_night_scaled

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, shuffle=True)

In [4]:
cat_columns = [0, 1, 3, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22]

### `MODEL`

In [5]:
from catboost import Pool as pool
from catboost import CatBoostRegressor as cbr

import hyperopt
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

In [6]:
D_train = pool(x_train, y_train, cat_features = cat_columns)
D_val = pool(x_test, y_test, cat_features = cat_columns)

In [7]:
# number of random sampled hyperparameters
N_HYPEROPT_PROBES = 10

# the sampling aplgorithm 
HYPEROPT_ALGO = tpe.suggest 

def get_catboost_params(space):
    params = dict()
    params['learning_rate'] = space['learning_rate']
    params['depth'] = int(space['depth'])
    params['l2_leaf_reg'] = space['l2_leaf_reg']
    params['one_hot_max_size'] = space['one_hot_max_size']
    return params

# ---------------------------------------------------------------------
obj_call_count = 0
cur_best_loss = np.inf
log_writer = open( 'catboost-hyperopt-log.txt', 'w' )


def objective(space):
    global obj_call_count, cur_best_loss

    obj_call_count += 1

    print('\nCatBoost objective call #{} cur_best_loss={:7.5f}'.format(obj_call_count,cur_best_loss) )

    params = get_catboost_params(space)

    
    model = cbr(iterations = 200, 
                learning_rate=params['learning_rate'], 
                depth =int(params['depth']), 
#                 task_type = "CPU",
                eval_metric = "RMSE",
                l2_leaf_reg=params['l2_leaf_reg'],
                bagging_temperature=1,
                one_hot_max_size=params['one_hot_max_size'],
                use_best_model=True)

    model.fit(D_train, eval_set=D_val, silent=True)
    val_loss = model.best_score_['validation_0']['RMSE']
    
    if val_loss<cur_best_loss:
      cur_best_loss = val_loss

    return{'loss':cur_best_loss, 'status': STATUS_OK }


# --------------------------------------------------------------------------------

space ={
        'depth': hp.quniform("depth", 4, 12, 1),
        'learning_rate': hp.loguniform('learning_rate', -3.0, -0.7),
        'l2_leaf_reg': hp.uniform('l2_leaf_reg', 1, 10),
        'one_hot_max_size': hp.quniform("one_hot_max_size", 0, 15, 1)
       }


trials = Trials()
best = hyperopt.fmin(fn=objective,
                     space=space,
                     algo=HYPEROPT_ALGO,
                     max_evals=N_HYPEROPT_PROBES,
                     trials=trials)

print('-'*50)
print('The best params:')
print( best )
print('\n\n')


CatBoost objective call #1 cur_best_loss=    inf

CatBoost objective call #2 cur_best_loss=0.98934

CatBoost objective call #3 cur_best_loss=0.98934

CatBoost objective call #4 cur_best_loss=0.98910

CatBoost objective call #5 cur_best_loss=0.98910

CatBoost objective call #6 cur_best_loss=0.98910

CatBoost objective call #7 cur_best_loss=0.98910

CatBoost objective call #8 cur_best_loss=0.98910

CatBoost objective call #9 cur_best_loss=0.98910

CatBoost objective call #10 cur_best_loss=0.98910
--------------------------------------------------
The best params:
{'depth': 8.0, 'l2_leaf_reg': 5.2643768281007945, 'learning_rate': 0.15208871780007424}





In [9]:
## all features 
# The best params:
# {'depth': 8.0, 'l2_leaf_reg': 5.2643768281007945, 'learning_rate': 0.15208871780007424}