# Tuning of parameters for DGA-Detector


## Loading data


In [1]:
import pyarrow as pa
import pyarrow.parquet as pq

dga = pq.read_table("floor/02-Preprocessed-data/DGA/01-DGA-Features.parquet")
benign = pq.read_table("floor/02-Preprocessed-data/Benign/00-Benign-Features.parquet")

# realign schemas (parquet files save in nonsense orders)
benign = benign.cast(dga.schema)

dga = dga.append_column("label", pa.array(["dga"] * len(dga)))
benign = benign.append_column("label", pa.array(["benign"] * len(benign)))

dga = dga.drop(["domain_name"])
benign = benign.drop(["domain_name"])

# concatentate tables
data = pa.concat_tables([dga, benign])
df = data.to_pandas()

# Handle NaNs
df.fillna(-1, inplace=True)

df

Unnamed: 0,lex_name_len,lex_has_digit,lex_phishing_keyword_count,lex_consecutive_chars,lex_tld_len,lex_tld_abuse_score,lex_sld_len,lex_sld_norm_entropy,lex_sld_digit_count,lex_sld_digit_ratio,...,mod_jaccard_tri-grams_benign,mod_jaccard_penta-grams_benign,mod_jaccard_bi-grams_dga,mod_jaccard_tri-grams_dga,mod_jaccard_penta-grams_dga,lex_avg_part_len,lex_stdev_part_lens,lex_longest_part_len,lex_shortest_sub_len,label
0,15,0,0,2,2,0.0000,12,0.237949,0.0,0.000000,...,0.538462,0.000000,0.857143,0.076923,0.000000,7.000000,0.500000,12,12,dga
1,31,1,0,2,3,0.0081,27,0.151053,9.0,0.333333,...,0.172414,0.000000,0.900000,0.137931,0.074074,15.000000,0.500000,27,27,dga
2,19,0,0,1,3,0.6554,15,0.221549,0.0,0.000000,...,0.411765,0.133333,0.888889,0.529412,0.000000,9.000000,0.500000,15,15,dga
3,13,0,1,1,4,0.0043,8,0.375000,0.0,0.000000,...,0.454545,0.222222,0.833333,0.272727,0.111111,6.000000,0.500000,8,8,dga
4,30,1,0,1,3,0.0081,26,0.165993,8.0,0.307692,...,0.142857,0.000000,0.689655,0.107143,0.000000,14.500000,0.500000,26,26,dga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
691813,15,0,0,2,10,0.0000,4,0.375000,0.0,0.000000,...,0.615385,0.454545,0.714286,0.076923,0.000000,4.333333,0.528321,7,4,benign
691814,30,0,0,2,5,0.0000,8,0.343750,0.0,0.000000,...,0.714286,0.307692,0.862069,0.285714,0.000000,9.333333,0.528321,15,23,benign
691815,9,0,0,2,2,0.0000,6,0.375272,0.0,0.000000,...,0.571429,0.000000,0.750000,0.142857,0.000000,4.000000,0.500000,6,6,benign
691816,7,0,0,3,3,0.0000,3,0.000000,0.0,0.000000,...,0.400000,0.000000,0.800000,0.000000,0.000000,3.000000,0.000000,3,3,benign


## Subsampling the dataset


In [12]:
subsample = 1.0 # 1.0 means no subsample

if subsample < 1.0:
    df = df.sample(frac=subsample)

## Supressing unwanted warnings


In [13]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning, module='sklearn.*')
warnings.filterwarnings('ignore', category=FutureWarning, module='xgboost.*')
warnings.filterwarnings('ignore', message="Series.__getitem__ treating keys as positions is deprecated.*")
warnings.filterwarnings("ignore", category=FutureWarning)

# Train-Test Split


In [14]:
from sklearn.model_selection import train_test_split

class_map = {"benign": 0, "dga": 1}

labels = df["label"].apply(lambda x: class_map[x])  # y vector
features = df.drop("label", axis=1).copy()  # X matrix

X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.3, random_state=42, shuffle=True, stratify=labels
)

## Custom tuning metric

Allow for setting weights for **Precision, F1, FPR, logloss** and **Overfitting** (difference between train and test scores).

- If you want to use it, the **scoring** should be set to **FETA_Score** on hyperparameter tuning.
- Otherwise, you can set **scoring** to metric you want: **neg_log_loss**, **precision**, **f1**, etc.


In [15]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, log_loss

def FETA_Score(estimator, X, y):
    # Getting predictions
    y_pred = estimator.predict(X)
    y_pred_proba = estimator.predict_proba(X)
    
    # Calculating precision
    precision = precision_score(y, y_pred)
    
    # Calculating recall
    recall = recall_score(y, y_pred)
    
    # Calculating F1-score
    f1 = f1_score(y, y_pred)
    
    # Calculating FPR
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
    
    # Calculating Log Loss
    logloss = log_loss(y, y_pred_proba)
    
    # Estimating overfitting
    train_score = estimator.score(X_train, y_train) 
    validation_score = estimator.score(X, y) 
    overfitting = train_score - validation_score
    
    # Assigning weights
    w_precision = 0.1     # Precision
    w_recall = 0.2        # Recall
    w_f1 = 0.6            # F1
    w_fpr = -0.0          # Negative because lower FPR is better
    w_logloss = -0.0      # Negative because lower log loss is better
    w_overfitting = -0.1  # Negative because lower overfitting is better
        
    # Combining metrics with weights
    combined_score = (w_precision * precision + w_recall * recall + w_f1 * f1 + w_fpr * fpr + w_overfitting * overfitting)
    
    return combined_score


In [16]:
# Tuning - graphs
def DisplayTuningGraph(scores):
    from numpy import array
    from numpy.ma import masked_array

    col_names = ['mean_train_score', 'mean_test_score']
    means_df = scores[col_names]
    ax = means_df.plot(kind='line', grid=True)

    from matplotlib import pyplot as plt
    plt.rcParams["figure.figsize"] = [12, 12]
    plt.rcParams["figure.autolayout"] = True

    plt.rcParams["figure.dpi"] = 300

    max_ids = means_df.idxmax(axis=0)

    for i in range(len(max_ids)):
        for col_name in col_names:
            value = means_df[col_name][max_ids[i]]
            id = max_ids[i]

            color = 'r' if max_ids[i] == max_ids['mean_test_score'] else 'grey'

            ax.scatter([id], [value],
                      marker='o',
                      color=color,
                      label='point',)

            ax.annotate(str(round(value, 3))+"_ID="+str(id),
                        (id, value),
                        xytext=(id+3, value))
    print(scores)


# Logistic Regression


In [17]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Logistic Regression model
log_reg_model = LogisticRegression(random_state=7)

# Grid of parameters to search
grid = {
    "C": [100],  # Regularization parameter (default: 1.0)
    "penalty": ["l2"],  # None, l1, l2 (default), elasticnet (both l1 and l2)
    "max_iter": [100, 1000, 3000, 5000],  # Maximum number of iterations (default: 100)
    "solver": ["liblinear"],  # lbfgs, liblinear, newton-cg, newton-cholesky, sag, saga}
}

# Stratified K-Fold cross-validator
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=7)

# Grid Search
grid_search = GridSearchCV(
    estimator=log_reg_model,
    param_grid=grid,
    cv=cv,
    scoring=FETA_Score,
    verbose=3,
    return_train_score=True,
    n_jobs=-1,  # Use all available CPUs
)

# Fit GridSearchCV to the scaled training data
grid_search.fit(X_train_scaled, y_train)

# Results of the grid search in a DataFrame
scores = pd.DataFrame(grid_search.cv_results_)

# Display tuning graph of grid search results
DisplayTuningGraph(scores)

# Set Pandas to display up to 50 columns of the DataFrame
pd.set_option("display.max_columns", 50)

# Print 5 best scores based on rank
scores.sort_values("rank_test_score").head()

Fitting 4 folds for each of 4 candidates, totalling 16 fits


# SVM


In [None]:
import copy

import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

svm_x_train = copy.deepcopy(X_train)
svm_x_test = copy.deepcopy(X_test)

svm_y_train = copy.deepcopy(y_train)
svm_y_test = copy.deepcopy(y_test)

# Fill NaNs and scale features
svm_x_train = svm_x_train.fillna(0)
scaler = MinMaxScaler()
svm_x_train = scaler.fit_transform(svm_x_train)

# Support Vector Machine model
svm = SVC(random_state=7)

# Grid of parameters to search
grid = {
    "C": [58, 59, 60],  # Regularization parameter
    "gamma": [0.09, 0.1, 0.11, 0.12],  # Kernel coefficient
    "kernel": ["rbf"],  # ['rbf', 'linear', 'poly', 'sigmoid']
}

# Stratified K-Folds cross-validator
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=7)

# Grid Search
grid_search = GridSearchCV(
    estimator=svm,
    param_grid=grid,
    cv=cv,
    scoring=FETA_Score,
    verbose=3,
    return_train_score=True,
    n_jobs=-1,  # Use all available CPUs
)

# Fit GridSearchCV to the scaled training data
grid_search.fit(svm_x_train, svm_y_train)

# Results of the grid search in a DataFrame
scores = pd.DataFrame(grid_search.cv_results_)

# Display tuning graph of grid search results
DisplayTuningGraph(scores)

# Set Pandas to display up to 50 columns of the DataFrame
pd.set_option("display.max_columns", 50)

# Print 5 best scores based on rank
scores.sort_values("rank_test_score").head()

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV] END .......C=58, gamma=0.09, kernel=rbf, shrinking=True; total time=19.8min
[CV] END .......C=58, gamma=0.09, kernel=rbf, shrinking=True; total time=18.2min
[CV] END .......C=58, gamma=0.09, kernel=rbf, shrinking=True; total time=17.8min
[CV] END .....C=58, gamma=0.09, kernel=rbf, shrinking=False; total time=114.8min
[CV] END .....C=58, gamma=0.09, kernel=rbf, shrinking=False; total time=112.2min
[CV] END .....C=58, gamma=0.09, kernel=rbf, shrinking=False; total time=113.3min
[CV] END ........C=58, gamma=0.1, kernel=rbf, shrinking=True; total time=19.2min
[CV] END ........C=58, gamma=0.1, kernel=rbf, shrinking=True; total time=18.0min
[CV] END ........C=58, gamma=0.1, kernel=rbf, shrinking=True; total time=18.2min
[CV] END ......C=58, gamma=0.1, kernel=rbf, shrinking=False; total time=114.2min
[CV] END ......C=58, gamma=0.1, kernel=rbf, shrinking=False; total time=113.5min
[CV] END ......C=58, gamma=0.1, kernel=rbf, shri

# Decision Tree


In [33]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier

# Decision Tree base model
dt = DecisionTreeClassifier(n_jobs=-1, random_state=7)

# Grid of parameters to search
grid = {
    "max_depth": [30, 50],  # Maximum depths or None for no limit
    "min_samples_split": range(
        5, 10
    ),  # Broader range for minimum number of samples required to split an internal node
    "min_samples_leaf": [
        5,
        8,
        9,
    ],  # Range for minimum number of samples required at a leaf node
    "max_leaf_nodes": [500],  # Maximum number of leaf nodes or None for no limit
}

# Stratified K-Folds cross-validator
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=7)

# Grid Search
grid_search = GridSearchCV(
    estimator=dt,
    param_grid=grid,
    cv=cv,
    scoring=FETA_Score,
    verbose=3,
    return_train_score=True,
    n_jobs=-1,  # Use all available CPUs
)

# Fit GridSearchCV to the scaled training data
grid_search.fit(X_train, y_train)

# Results of the grid search in a DataFrame
scores = pd.DataFrame(grid_search.cv_results_)

# Display tuning graph of grid search results
DisplayTuningGraph(scores)

# Set Pandas to display up to 50 columns of the DataFrame
pd.set_option("display.max_columns", 50)

# Print 5 best scores based on rank
scores.sort_values("rank_test_score").head()

# Random Forest


In [46]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold

# Random Forest base model
rf = RandomForestClassifier(random_state=7, n_jobs=-1)

# Grid of parameters to search
grid = {
    # The function to measure the quality of a split. "entropy" is used for the information gain.
    "criterion": "entropy",

    # Weights associated with classes. In this case, class 0 has weight 1, and class 1 has weight 5.
    # This is used to address imbalances in the training data.
    "class_weight": {0: 1, 1: 5},

    # The number of trees in the forest. A list of possible values to try.
    "n_estimators": [99, 100, 125, 150, 175, 200],

    # The maximum depth of each tree. Deeper trees can learn more detailed data specifics, at the risk of overfitting.
    "max_depth": [11, 14, 18, 19],

    # The minimum number of samples required to split an internal node. Higher values prevent the model from learning overly specific patterns, thus reducing overfitting.
    "min_samples_split": [2, 3, 9, 10, 13, 17],

    # The minimum number of samples a node must have to be considered a leaf. Can help in controlling overfitting.
    "min_samples_leaf": [1, 2, 3, 6, 9],

    # The maximum number of leaf nodes per tree. 'None' means unlimited. Limiting this number can effectively reduce model complexity.
    "max_leaf_nodes": [None, 150, 180, 190]
}

# Stratified K-Folds cross-validator
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=7)

# Grid Search
grid_search = GridSearchCV(
    estimator=rf,
    param_grid=grid,
    cv=cv,
    scoring=FETA_Score,
    verbose=3,
    return_train_score=True,
    n_jobs=-1,  # Use all available CPUs
)

# Fit GridSearchCV to the scaled training data
grid_search.fit(X_train, y_train)

# Results of the grid search in a DataFrame
scores = pd.DataFrame(grid_search.cv_results_)

# Display tuning graph of grid search results
DisplayTuningGraph(scores)

# Set Pandas to display up to 50 columns of the DataFrame
pd.set_option("display.max_columns", 50)

# Print 5 best scores based on rank
scores.sort_values("rank_test_score").head()

Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV 1/3] END max_depth=10, n_estimators=50;, score=(train=0.844, test=0.831) total time=   6.8s
[CV 2/3] END max_depth=10, n_estimators=50;, score=(train=0.846, test=0.835) total time=   6.8s
[CV 3/3] END max_depth=10, n_estimators=50;, score=(train=0.847, test=0.829) total time=   6.8s
[CV 1/3] END max_depth=10, n_estimators=70;, score=(train=0.845, test=0.831) total time=   9.4s
[CV 2/3] END max_depth=10, n_estimators=70;, score=(train=0.845, test=0.833) total time=   9.5s
[CV 3/3] END max_depth=10, n_estimators=70;, score=(train=0.846, test=0.829) total time=   9.4s
[CV 1/3] END max_depth=10, n_estimators=143;, score=(train=0.846, test=0.832) total time=  19.2s
[CV 2/3] END max_depth=10, n_estimators=143;, score=(train=0.846, test=0.834) total time=  19.6s
[CV 3/3] END max_depth=10, n_estimators=143;, score=(train=0.847, test=0.828) total time=  19.3s
[CV 1/3] END max_depth=10, n_estimators=200;, score=(train=0.846, test=0

# AdaBoost


In [73]:
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier

# AdaBoost base model
ada = AdaBoostClassifier(random_state=7)

# Grid of parameters to search
grid = {
    "base_estimator": [
        DecisionTreeClassifier(max_depth=depth) for depth in range(1, 3)
    ],  # This parameter defines the type of model AdaBoost will use as the weak learner.
    "n_estimators": [150, 200, 250],  # Number of weak learners to train iteratively
    "learning_rate": [
        0.4,
        0.75,
        1,
    ],  # Learning rate shrinks the contribution of each classifier
}

# Stratified K-Folds cross-validator
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=7)

# Grid Search
grid_search = GridSearchCV(
    estimator=ada,
    param_grid=grid,
    cv=cv,
    scoring=FETA_Score,
    verbose=3,
    return_train_score=True,
    n_jobs=-1,  # Use all available CPUs
)

# Fit GridSearchCV to the scaled training data
grid_search.fit(X_train, y_train)

# Results of the grid search in a DataFrame
scores = pd.DataFrame(grid_search.cv_results_)

# Display tuning graph of grid search results
DisplayTuningGraph(scores)

# Set Pandas to display up to 50 columns of the DataFrame
pd.set_option("display.max_columns", 50)

# Print 5 best scores based on rank
scores.sort_values("rank_test_score").head()

Fitting 2 folds for each of 3 candidates, totalling 6 fits
[CV 1/2] END n_estimators=200;, score=(train=0.505, test=0.463) total time= 1.1min
[CV 2/2] END n_estimators=200;, score=(train=0.504, test=0.468) total time= 1.1min
[CV 1/2] END n_estimators=400;, score=(train=0.504, test=0.467) total time= 2.3min
[CV 2/2] END n_estimators=400;, score=(train=0.504, test=0.471) total time= 2.1min
[CV 1/2] END n_estimators=650;, score=(train=0.504, test=0.470) total time= 4.0min
[CV 2/2] END n_estimators=650;, score=(train=0.504, test=0.473) total time= 3.6min


# XGBoost


In [37]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split
from xgboost import XGBClassifier

# Assuming df is your DataFrame and class_map maps your classes
labels = df["label"].apply(lambda x: class_map[x])
features = df.drop("label", axis=1).copy()

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.3, random_state=42, shuffle=True, stratify=labels
)

params = {
    "max_depth": 9,
    "eta": 0.15,
    "objective": "binary:logistic",
    "tree_method": "gpu_hist",
    "min_child_weight": 2.0,
    "subsample": 0.6,
    "sampling_method": "gradient_based",
    "alpha": 0,
    "gamma": 0.1,
    "lambda": 1.0,
    "max_delta_step": 0,
    "grow_policy": "lossguide",
    "max_bin": 512,
    "n_estimators": 550,
    "eval_metric": ["error", "logloss", "auc"],
    "random_state": 7,
}

# XGBoost base model
xgb = XGBClassifier(**params)

# Grid of parameters to search
grid = {
    "scale_pos_weight": [1, 2, 3, 4, 4.5, 5, 5.2, 6.28],
    # Add other parameters here if needed
}


# Stratified K-Folds cross-validator
cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=7)

# Grid Search
grid_search = GridSearchCV(
    estimator=xgb,
    param_grid=grid,
    cv=cv,
    # scoring='f1',  # You can change this to another scoring method if needed
    scoring=FETA_Score,  # Custom metric
    verbose=3,
    return_train_score=True,
    n_jobs=-1,  # Use all available CPUs
)

grid_search.fit(
    X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=False
)

# Best estimator
best_xgb = grid_search.best_estimator_

# Results
scores = pd.DataFrame(grid_search.cv_results_)

DisplayTuningGraph(scores)

scores

Fitting 2 folds for each of 8 candidates, totalling 16 fits
[CV 1/2] END scale_pos_weight=1;, score=(train=0.900, test=0.870) total time=  19.3s
[CV 2/2] END scale_pos_weight=1;, score=(train=0.900, test=0.870) total time=  19.5s
[CV 1/2] END scale_pos_weight=2;, score=(train=0.900, test=0.871) total time=  19.9s
[CV 2/2] END scale_pos_weight=2;, score=(train=0.900, test=0.871) total time=  19.8s
[CV 1/2] END scale_pos_weight=3;, score=(train=0.900, test=0.872) total time=  20.3s
[CV 2/2] END scale_pos_weight=3;, score=(train=0.900, test=0.872) total time=  20.2s
[CV 1/2] END scale_pos_weight=4;, score=(train=0.900, test=0.873) total time=  20.5s
[CV 2/2] END scale_pos_weight=4;, score=(train=0.900, test=0.872) total time=  20.3s
[CV 1/2] END scale_pos_weight=4.5;, score=(train=0.900, test=0.873) total time=  20.5s
[CV 2/2] END scale_pos_weight=4.5;, score=(train=0.900, test=0.873) total time=  20.7s
[CV 1/2] END scale_pos_weight=5;, score=(train=0.900, test=0.872) total time=  20.5s
[

# LightGBM


In [62]:
import pandas as pd
from lightgbm import LGBMClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold

# LightGBM base model
lgbm = LGBMClassifier(n_jobs=-1, random_state=7)

# Grid of parameters to search
grid = {
    "objective": "binary",  # binary classification
    "boosting_type": "gbdt",  # Gradient Boosting Decision Tree
    "min_child_samples": 25,
    "colsample_bytree": 1,
    "reg_lambda": 0.45,
    "subsample": 0.85,
    "subsample_freq": 1,
    "subsample_for_bin": 200000,
    "min_split_gain": 0.01,
    "n_estimators": [1050, 1150, 1200],
    "max_depth": [11, 12, 13, 14],
    "num_leaves": [28, 30, 32],
    "learning_rate": [0.075, 0.1, 0.15],
    "scale_pos_weight": [1.4, 1.5, 1.6, 1.7]
}

# Stratified K-Folds cross-validator
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=7)

# Grid Search
grid_search = GridSearchCV(
    estimator=lgbm,
    param_grid=grid,
    cv=cv,
    scoring=FETA_Score,
    verbose=3,
    return_train_score=True,
    n_jobs=-1,  # Use all available CPUs
)

# Fit GridSearchCV to the scaled training data
grid_search.fit(X_train, y_train)

# Results of the grid search in a DataFrame
scores = pd.DataFrame(grid_search.cv_results_)

# Display tuning graph of grid search results
DisplayTuningGraph(scores)

# Set Pandas to display up to 50 columns of the DataFrame
pd.set_option("display.max_columns", 50)

# Print 5 best scores based on rank
scores.sort_values("rank_test_score").head()

Fitting 3 folds for each of 8 candidates, totalling 24 fits
[LightGBM] [Info] Number of positive: 31898, number of negative: 201866
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.021870 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12388
[LightGBM] [Info] Number of data points in the train set: 233764, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.136454 -> initscore=-1.845061
[LightGBM] [Info] Start training from score -1.845061
[CV 1/3] END max_depth=10, n_estimators=897;, score=(train=1.000, test=0.972) total time=  13.9s
[LightGBM] [Info] Number of positive: 31898, number of negative: 201867
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.021585 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force

[CV 2/3] END max_depth=10, n_estimators=1600;, score=(train=1.000, test=0.973) total time=  25.3s
[LightGBM] [Info] Number of positive: 31898, number of negative: 201867
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.022907 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12400
[LightGBM] [Info] Number of data points in the train set: 233765, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.136453 -> initscore=-1.845066
[LightGBM] [Info] Start training from score -1.845066
[CV 3/3] END max_depth=10, n_estimators=1600;, score=(train=1.000, test=0.972) total time=  24.7s
[LightGBM] [Info] Number of positive: 31898, number of negative: 201866
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.021628 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if m

[LightGBM] [Info] Number of positive: 31898, number of negative: 201867
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.022888 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12400
[LightGBM] [Info] Number of data points in the train set: 233765, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.136453 -> initscore=-1.845066
[LightGBM] [Info] Start training from score -1.845066
[CV 3/3] END max_depth=11, n_estimators=1600;, score=(train=1.000, test=0.972) total time=  26.0s
[LightGBM] [Info] Number of positive: 47847, number of negative: 302800
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.127560 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12392
[LightGBM] [Info] Number of data points in the train set: 350647, numbe