In [1]:
import pandas as pd
import numpy as np
import joblib as jb

In [4]:
train_train_level0 = jb.load("train_train_level0.pkl.z")
train_train_level1 = jb.load("train_train_level1.pkl.z")
train_valid_level1 = jb.load("train_valid_level1.pkl.z")
test = pd.read_feather("test.f")


X_train0, y_train0 = train_train_level0.filter(regex=r'feature', axis=1), train_train_level0['target_kazutsugi']
X_train1, y_train1 = train_train_level1.filter(regex=r'feature', axis=1), train_train_level1['target_kazutsugi']
X_val1, y_val1 = train_valid_level1.filter(regex=r'feature', axis=1), train_valid_level1['target_kazutsugi']
X_test = test.filter(regex=r'feature', axis=1)

In [5]:
from scipy.stats import spearmanr


In [14]:
from lightgbm import LGBMRegressor
from skopt import gp_minimize


def tune_lgbm(params):
    num_leaves, min_data_in_leaf, learning_rate = params
    mdl = LGBMRegressor(num_leaves=num_leaves,min_data_in_leaf=min_data_in_leaf, learning_rate=learning_rate, 
                        n_estimators=100, random_state=0)
    mdl.fit(X_train0, y_train0)
    
    p = mdl.predict(X_train1)
    model_name_train1 = "./preds_train1/lgbm_{}_{}_{}.pkl.z".format(num_leaves, min_data_in_leaf, learning_rate) 
    jb.dump(p, model_name_train1)
    
    metric = spearmanr(y_train1.values,p).correlation
    
    p = mdl.predict(X_val1)
    model_name_val1 = "./preds_val1/lgbm_{}_{}_{}.pkl.z".format(num_leaves, min_data_in_leaf, learning_rate) 
    jb.dump(p, model_name_val1)
    
    p = mdl.predict(X_test)
    model_name_test = "./preds_test/lgbm_{}_{}_{}.pkl.z".format(num_leaves, min_data_in_leaf, learning_rate) 
    jb.dump(p, model_name_test)
    
    print(params, metric)
    print()
    
    return -metric

In [16]:
space = [(2, 200),
         (1, 1000),
         (1e-3, 1e-2, 'log-uniform')]

res = gp_minimize(tune_lgbm, space, random_state=0, verbose=1, n_calls=20)

Iteration No: 1 started. Evaluating function at random point.
[119, 844, 0.007210171877207321] 0.026955700020330455

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 12.6890
Function value obtained: -0.0270
Current minimum: -0.0270
Iteration No: 2 started. Evaluating function at random point.
[170, 624, 0.002423157857285798] 0.0214846981159758

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 15.0336
Function value obtained: -0.0215
Current minimum: -0.0270
Iteration No: 3 started. Evaluating function at random point.
[61, 58, 0.0018735112038624302] 0.019578402948099274

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 12.9609
Function value obtained: -0.0196
Current minimum: -0.0270
Iteration No: 4 started. Evaluating function at random point.
[97, 812, 0.0030197929882137525] 0.022529559135000904

Iteration No: 4 ended. Evaluation done at random point.
Time taken: 13.3611
Function value obtained: -0.0225
Current minimum: -0.0270

In [None]:
# fazer diário