In [8]:
using LightGBM
using DelimitedFiles

LIGHTGBM_SOURCE = abspath("../LightGBM-3.3.5")

# Load LightGBM's binary classification example.
binary_test = readdlm(joinpath(LIGHTGBM_SOURCE, "examples", "binary_classification", "binary.test"), '\t')
binary_train = readdlm(joinpath(LIGHTGBM_SOURCE, "examples", "binary_classification", "binary.train"), '\t')
X_train = binary_train[:, 2:end]
y_train = binary_train[:, 1]
X_test = binary_test[:, 2:end]
y_test = binary_test[:, 1]

# Create an estimator with the desired parameters—leave other parameters at the default values.
estimator = LGBMClassification(
    objective = "binary",
    num_iterations = 100,
    learning_rate = .1,
    early_stopping_round = 5,
    feature_fraction = .8,
    bagging_fraction = .9,
    bagging_freq = 1,
    num_leaves = 1000,
    num_class = 1,
    metric = ["auc", "binary_logloss"]
)

# Fit the estimator on the training data and return its scores for the test data.
fit!(estimator, X_train, y_train, (X_test, y_test))

# Predict arbitrary data with the estimator.
predict(estimator, X_train)

[LightGBM] [Info] Number of positive: 3716, number of negative: 3284
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6132
[LightGBM] [Info] Number of data points in the train set: 7000, number of used features: 28
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.530857 -> initscore=0.123586
[LightGBM] [Info] Start training from score 0.123586
Iteration: 1, test_1's auc: 0.7224184081527347, 
Iteration: 1, test_1's binary_logloss: 0.6672355828476975
Iteration: 2, test_1's auc: 0.7365437951496389, 
Iteration: 2, test_1's binary_logloss: 0.6548998688529941
Iteration: 3, test_1's auc: 0.74468685500516, 
Iteration: 3, test_1's binary_logloss: 0.6438174499067784
Iteration: 4, test_1's auc: 0.7829108617131063, 
Iteration: 4, test_1's binary_logloss: 0.6256971477706849
Iteration: 5, test_1's auc: 0.7844910990712074, 
Iteration: 5, test_1's binary_logloss: 0.6169980822636008
Iteration: 6, test_1's auc: 0.7897478070175439, 
Iteration: 6, test_1's binary_log

7000×1 Matrix{Float64}:
 0.8618890446596
 0.8381473816945575
 0.9298722289556586
 0.45943882130333724
 0.6725514207064556
 0.08147839610372432
 0.8550765165160386
 0.9572544729363037
 0.8197341486751601
 0.9506474812125634
 ⋮
 0.922930677778771
 0.1970049568539398
 0.8287868785691274
 0.8127550270488795
 0.6029616645763142
 0.3493224642559177
 0.11324811937134613
 0.18694614761919567
 0.7441991052332468

In [9]:
# Cross-validate using a two-fold cross-validation iterable providing training indices.
splits = (collect(1:3500), collect(3501:7000))
cv(estimator, X_train, y_train, splits)


Cross-validation: 1
[LightGBM] [Info] Number of positive: 1860, number of negative: 1640
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6132
[LightGBM] [Info] Number of data points in the train set: 3500, number of used features: 28
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.531429 -> initscore=0.125880
[LightGBM] [Info] Start training from score 0.125880
Iteration: 1, validation's auc: 0.6643102661716587, 
Iteration: 1, validation's binary_logloss: 0.6762830351464394
Iteration: 2, validation's auc: 0.690786015238275, 
Iteration: 2, validation's binary_logloss: 0.6653903118693901
Iteration: 3, validation's auc: 0.7014937088367312, 
Iteration: 3, validation's binary_logloss: 0.6553398719078511
Iteration: 4, validation's auc: 0.7281074335095226, 
Iteration: 4, validation's binary_logloss: 0.6415788367116529
Iteration: 5, validation's auc: 0.7368574138455407, 
Iteration: 5, validation's binary_logloss: 0.6326239104410488
Iteration: 6, valida

Dict{String, Dict{String, Vector{Float64}}} with 1 entry:
  "validation" => Dict("binary_logloss"=>[0.586477, 0.579398], "auc"=>[0.752507…

In [10]:
# Exhaustive search on an iterable containing all combinations of learning_rate ∈ {.1, .2} and
# bagging_fraction ∈ {.8, .9}
params = [Dict(:learning_rate => learning_rate,
               :bagging_fraction => bagging_fraction) for
          learning_rate in (.1, .2),
          bagging_fraction in (.8, .9)]
search_cv(estimator, X_train, y_train, splits, params)

# Save and load the fitted model.
filename = pwd() * "/finished.model"
savemodel(estimator, filename)
loadmodel!(estimator, filename)


Search: 1
Dict(:learning_rate => 0.1, :bagging_fraction => 0.8)
You can set `force_col_wise=true` to remove the overhead.
You can set `force_col_wise=true` to remove the overhead.
- validation's binary_logloss mean: 0.578013940413016, std: 0.0022481713012254666
- validation's auc mean: 0.7630972141334994, std: 0.001964220425692013

Search: 2
Dict(:learning_rate => 0.2, :bagging_fraction => 0.8)
You can set `force_col_wise=true` to remove the overhead.
You can set `force_col_wise=true` to remove the overhead.
- validation's binary_logloss mean: 0.5947483595330175, std: 0.0040607141121178185
- validation's auc mean: 0.750553104320484, std: 0.0046759408965666525

Search: 3
Dict(:learning_rate => 0.1, :bagging_fraction => 0.9)
You can set `force_col_wise=true` to remove the overhead.
You can set `force_col_wise=true` to remove the overhead.
- validation's binary_logloss mean: 0.5829376820423164, std: 0.0050058454080974465
- validation's auc mean: 0.756891855884815, std: 0.0062018266238590