In [1]:
import torch
import numpy as np
import lightgbm as lgb
from sklearn.svm import SVR
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import RandomizedSearchCV, train_test_split

In [2]:
x_train = torch.load('../data/X_tensor_cpu.pt')
y_train = torch.load('../data/y_tensor.pt')

In [3]:
X_train = []
for x in x_train:
    vector = [v.detach().numpy() for v in x]
    X_train.append(vector)

In [4]:
param_grid_lgbm = {
    'boosting_type': ['gbdt', 'dart', 'goss'],
    'num_leaves': np.linspace(10, 150, 15, dtype=int),
    'n_estimators': np.linspace(50, 200, 10, dtype=int),
    'learning_rate': np.logspace(-4, -1, 10),
    'subsample_for_bin': np.linspace(20000, 300000, 15, dtype=int),
    'min_split_gain': [0, 0.001, 0.01, 0.1, 1],
    'min_child_weight': [0.0001, 0.001, 0.01, 0.1],
    'min_child_samples': np.linspace(20, 500, 15, dtype=int),
    'reg_alpha': np.logspace(-3, 2, 6),
    'reg_lambda': np.logspace(-3, 2, 6),
    'colsample_bytree': np.linspace(0.1, 1, 10),
}

In [5]:
param_grid_etr = {
    'n_estimators': np.linspace(10, 200, 15, dtype=int),
    'max_depth': np.linspace(1, 25, 5, dtype=int),
    'min_samples_split': np.linspace(2, 20, 5, dtype=int),
    'min_samples_leaf': np.linspace(1, 15, 5, dtype=int),
    'max_features': [1.0, 'sqrt', 'log2']
}

In [6]:
param_grid_svr = {
    'C': [0.1, 1, 5, 10, 50, 100],
    'gamma': [0.01, 0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [1, 2, 3, 4],
    'epsilon': [0.01, 0.1, 1]
}   

In [7]:
lgbm_regressor = lgb.LGBMRegressor()
svr_regressor = SVR()
etr_regressor = ExtraTreesRegressor()

In [8]:
n_iter = 800
scoring = 'neg_root_mean_squared_error'

In [9]:
random_search_lgbm = RandomizedSearchCV(
    lgbm_regressor,
    param_distributions=param_grid_lgbm,
    n_iter=n_iter,
    cv=5,
    n_jobs=-1,
    scoring=scoring
)

In [10]:
random_search_svr = RandomizedSearchCV(
    svr_regressor,
    param_distributions=param_grid_svr,
    n_iter=n_iter,
    cv=5,
    n_jobs=-1,
    scoring=scoring
)

In [11]:
random_search_etr = RandomizedSearchCV(
    etr_regressor,
    param_distributions=param_grid_etr,
    n_iter=n_iter,
    cv=5,
    n_jobs=-1,
    scoring=scoring,
)

In [12]:
random_search_lgbm.fit(X_train, y_train)

In [13]:
random_search_svr.fit(X_train, y_train)

In [14]:
random_search_etr.fit(X_train, y_train)

In [15]:
print('Best params for LGBM')
print(random_search_lgbm.best_params_)

Best params for LGBM
{'subsample_for_bin': 140000, 'reg_lambda': 0.1, 'reg_alpha': 1.0, 'num_leaves': 100, 'n_estimators': 166, 'min_split_gain': 1, 'min_child_weight': 0.0001, 'min_child_samples': 20, 'learning_rate': 0.1, 'colsample_bytree': 1.0, 'boosting_type': 'dart'}


In [16]:
# lgbm_params = {
#     'subsample_for_bin': 140000,
#     'reg_lambda': 0.1,
#     'reg_alpha': 1.0,
#     'num_leaves': 100,
#     'n_estimators': 166,
#     'min_split_gain': 1,
#     'min_child_weight': 0.0001,
#     'min_child_samples': 20,
#     'learning_rate': 0.1,
#     'colsample_bytree': 1.0,
#     'boosting_type': 'dart'
# }

In [17]:
print('Best params for SVR')
print(random_search_svr.best_params_)

Best params for SVR
{'kernel': 'rbf', 'gamma': 1, 'epsilon': 0.01, 'degree': 2, 'C': 5}


In [18]:
# svr_params = {
#     'kernel': 'rbf',
#     'gamma': 1,
#     'epsilon': 0.01,
#     'degree': 2,
#     'C': 5
# }

In [19]:
print('Best params for ETR')
print(random_search_etr.best_params_)

Best params for ETR
{'n_estimators': 37, 'min_samples_split': 15, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 7}


In [20]:
# etr_params = {
#     'n_estimators': 37,
#     'min_samples_split': 15,
#     'min_samples_leaf': 4,
#     'max_features': 'log2',
#     'max_depth': 7
# }