In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from lightgbm import LGBMClassifier
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import make_union
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import RobustScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score

In [2]:
url= "/home/ricardo/code/fingerman/project_loan/raw_data/train.csv"
train=pd.read_csv(url)
train.head(5)

Unnamed: 0,id,annual_income,debt_to_income_ratio,credit_score,loan_amount,interest_rate,gender,marital_status,education_level,employment_status,loan_purpose,grade_subgrade,loan_paid_back
0,0,29367.99,0.084,736,2528.42,13.67,Female,Single,High School,Self-employed,Other,C3,1.0
1,1,22108.02,0.166,636,4593.1,12.92,Male,Married,Master's,Employed,Debt consolidation,D3,0.0
2,2,49566.2,0.097,694,17005.15,9.76,Male,Single,High School,Employed,Debt consolidation,C5,1.0
3,3,46858.25,0.065,533,4682.48,16.1,Female,Single,High School,Employed,Debt consolidation,F1,1.0
4,4,25496.7,0.053,665,12184.43,10.21,Male,Married,High School,Employed,Other,D1,1.0


In [3]:
url= "/home/ricardo/code/fingerman/project_loan/raw_data/test.csv"
test=pd.read_csv(url)
test.head(5)

Unnamed: 0,id,annual_income,debt_to_income_ratio,credit_score,loan_amount,interest_rate,gender,marital_status,education_level,employment_status,loan_purpose,grade_subgrade
0,593994,28781.05,0.049,626,11461.42,14.73,Female,Single,High School,Employed,Other,D5
1,593995,46626.39,0.093,732,15492.25,12.85,Female,Married,Master's,Employed,Other,C1
2,593996,54954.89,0.367,611,3796.41,13.29,Male,Single,Bachelor's,Employed,Debt consolidation,D1
3,593997,25644.63,0.11,671,6574.3,9.57,Female,Single,Bachelor's,Employed,Debt consolidation,C3
4,593998,25169.64,0.081,688,17696.89,12.8,Female,Married,PhD,Employed,Business,C1


In [4]:
X=train.drop(columns=["id","loan_paid_back"])
y=train["loan_paid_back"]

In [5]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [6]:
cat_cols = X_train.select_dtypes(include=["object"]).columns
num_cols = X_train.select_dtypes(exclude=["object"]).columns


In [9]:
preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore",sparse_output=False), cat_cols),
        ("num", RobustScaler(), num_cols)
    ]
)

In [10]:
model = Pipeline([
    ("prep", preprocess),
    ("lgbm", LGBMClassifier(
        n_estimators=500,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        n_jobs=-1,
    ))
])

In [11]:
model.fit(X_train, y_train)

[LightGBM] [Info] Number of positive: 379595, number of negative: 95600
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009039 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 475195, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798819 -> initscore=1.378932
[LightGBM] [Info] Start training from score 1.378932


0,1,2
,steps,"[('prep', ...), ('lgbm', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('cat', ...), ('num', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,with_centering,True
,with_scaling,True
,quantile_range,"(25.0, ...)"
,copy,True
,unit_variance,False

0,1,2
,boosting_type,'gbdt'
,num_leaves,31
,max_depth,-1
,learning_rate,0.05
,n_estimators,500
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [12]:
val_pred = model.predict_proba(X_val)[:, 1]
roc_auc = roc_auc_score(y_val, val_pred)
roc_auc



0.9206775245437111

In [14]:
X_test=test

In [15]:
test_pred = model.predict_proba(X_test)[:, 1]



In [17]:
submission = pd.DataFrame({
    "id": test["id"],
    "loan_paid_back": test_pred
})

submission.to_csv("submission5.csv", index=False)
submission.head()

Unnamed: 0,id,loan_paid_back
0,593994,0.924395
1,593995,0.981201
2,593996,0.473285
3,593997,0.920041
4,593998,0.959447


In [18]:
from sklearn.model_selection import GridSearchCV
from lightgbm import LGBMClassifier

In [19]:
model2 = Pipeline([
    ("prep", preprocess),
    ("lgbm", LGBMClassifier(
        random_state=42,
        n_jobs=-1
    ))
])

In [20]:
param_grid = {
    "lgbm__n_estimators": [200, 500, 800],
    "lgbm__learning_rate": [0.01, 0.05, 0.1],
    "lgbm__subsample": [0.6, 0.8, 1.0],
    "lgbm__colsample_bytree": [0.6, 0.8, 1.0],
    "lgbm__max_depth": [3, 5, 7],
    "lgbm__num_leaves": [31, 50, 70]
}

In [None]:
# grid_search = GridSearchCV(
#     estimator=model2,
#     param_grid=param_grid,
#     scoring="roc_auc",
#     cv=3,
#     n_jobs=-1,
#     verbose=2
# )

In [None]:
from sklearn.model_selection import RandomizedSearchCV

rand_search = RandomizedSearchCV(
    estimator=model2,
    param_distributions=param_grid,
    n_iter=50,
    scoring="roc_auc",
    cv=3,
    n_jobs=-1,
    verbose=2,
    random_state=42
)


In [26]:
rand_search.fit(X_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Number of positive: 253063, number of negative: 63734
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Number of positive: 253063, number of negative: 63734
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.177582 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Auto-choosin



[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.05, lgbm__max_depth=5, lgbm__n_estimators=200, lgbm__num_leaves=50, lgbm__subsample=0.8; total time=109.1min
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.285921 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798821 -> initscore=1.378940
[LightGBM] [Info] Start training from score 1.378940




[CV] END lgbm__colsample_bytree=1.0, lgbm__learning_rate=0.01, lgbm__max_depth=5, lgbm__n_estimators=200, lgbm__num_leaves=31, lgbm__subsample=1.0; total time=110.1min




[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.05, lgbm__max_depth=5, lgbm__n_estimators=200, lgbm__num_leaves=50, lgbm__subsample=0.8; total time=110.3min
[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.05, lgbm__max_depth=5, lgbm__n_estimators=200, lgbm__num_leaves=50, lgbm__subsample=0.8; total time=110.3min




[CV] END lgbm__colsample_bytree=1.0, lgbm__learning_rate=0.01, lgbm__max_depth=5, lgbm__n_estimators=200, lgbm__num_leaves=31, lgbm__subsample=1.0; total time=110.5min
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.093085 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316796, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798820 -> initscore=1.378936
[LightGBM] [Info] Start training from score 1.378936
[LightGBM] [Info] Number of positive: 253063, number of negative: 63734
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.995468 seconds.
You can set `force_row_wis



[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.01, lgbm__max_depth=3, lgbm__n_estimators=800, lgbm__num_leaves=50, lgbm__subsample=1.0; total time=116.0min
[LightGBM] [Info] Number of positive: 253063, number of negative: 63734
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.505836 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798818 -> initscore=1.378920
[LightGBM] [Info] Start training from score 1.378920








[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.01, lgbm__max_depth=3, lgbm__n_estimators=800, lgbm__num_leaves=50, lgbm__subsample=1.0; total time=116.8min
[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.01, lgbm__max_depth=3, lgbm__n_estimators=800, lgbm__num_leaves=50, lgbm__subsample=1.0; total time=116.9min
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.688032 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798821 -> initscore=1.378940
[LightGBM] [Info] Start training from score 1.378940
[LightGBM] [Info] Auto-choosing ro



[CV] END lgbm__colsample_bytree=1.0, lgbm__learning_rate=0.01, lgbm__max_depth=5, lgbm__n_estimators=200, lgbm__num_leaves=31, lgbm__subsample=1.0; total time=118.4min
[LightGBM] [Info] Number of positive: 253063, number of negative: 63734
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.889174 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798818 -> initscore=1.378920
[LightGBM] [Info] Start training from score 1.378920





[CV] END lgbm__colsample_bytree=0.6, lgbm__learning_rate=0.05, lgbm__max_depth=3, lgbm__n_estimators=200, lgbm__num_leaves=31, lgbm__subsample=0.6; total time=22.1min
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.130331 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[CV] END lgbm__colsample_bytree=0.6, lgbm__learning_rate=0.05, lgbm__max_depth=3, lgbm__n_estimators=200, lgbm__num_leaves=31, lgbm__subsample=0.6; total time=22.2min
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798821 -> initscore=1.378940
[LightGBM] [Info] Start training from score 1.378940
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.573560 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316796, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798820 -> initscore=1.378936
[LightGBM] [Info] Start training from score 1.378936
[CV] END lgbm__colsample_bytree=0.6, lgbm__learning_rate=0.05, lgbm__max_depth=3, lgbm__n_estimators=200, lgbm__num_leaves=31, lgbm__subsample=0.6; total time=22.4min
[LightGBM] [Info] Number of positive: 253063, number of negative: 63734
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.265621 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Inf



[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.01, lgbm__max_depth=7, lgbm__n_estimators=200, lgbm__num_leaves=50, lgbm__subsample=1.0; total time=197.0min




[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.01, lgbm__max_depth=7, lgbm__n_estimators=200, lgbm__num_leaves=50, lgbm__subsample=1.0; total time=197.1min
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.321866 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316796, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798820 -> initscore=1.378936
[LightGBM] [Info] Start training from score 1.378936
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.534034 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set



[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.01, lgbm__max_depth=7, lgbm__n_estimators=200, lgbm__num_leaves=50, lgbm__subsample=1.0; total time=197.9min
[LightGBM] [Info] Number of positive: 253063, number of negative: 63734
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.359285 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798818 -> initscore=1.378920
[LightGBM] [Info] Start training from score 1.378920






[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.05, lgbm__max_depth=3, lgbm__n_estimators=800, lgbm__num_leaves=31, lgbm__subsample=0.6; total time=98.8min
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.269158 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798821 -> initscore=1.378940
[LightGBM] [Info] Start training from score 1.378940




[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.05, lgbm__max_depth=3, lgbm__n_estimators=800, lgbm__num_leaves=31, lgbm__subsample=0.6; total time=98.9min
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.753650 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316796, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798820 -> initscore=1.378936
[LightGBM] [Info] Start training from score 1.378936





[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.05, lgbm__max_depth=3, lgbm__n_estimators=800, lgbm__num_leaves=31, lgbm__subsample=1.0; total time=94.1min
[LightGBM] [Info] Number of positive: 253063, number of negative: 63734
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.643335 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798818 -> initscore=1.378920
[LightGBM] [Info] Start training from score 1.378920




[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.05, lgbm__max_depth=3, lgbm__n_estimators=800, lgbm__num_leaves=31, lgbm__subsample=0.6; total time=95.9min




[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.05, lgbm__max_depth=3, lgbm__n_estimators=800, lgbm__num_leaves=31, lgbm__subsample=1.0; total time=95.6min
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.430021 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798821 -> initscore=1.378940
[LightGBM] [Info] Start training from score 1.378940
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.618992 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set 







[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.1, lgbm__max_depth=7, lgbm__n_estimators=500, lgbm__num_leaves=31, lgbm__subsample=0.6; total time=268.2min
[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.1, lgbm__max_depth=7, lgbm__n_estimators=500, lgbm__num_leaves=31, lgbm__subsample=0.6; total time=268.3min
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733
[LightGBM] [Info] Number of positive: 253063, number of negative: 63734
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.018067 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798821 -> initscore=1.378940
[LightGBM] [Info] Start training from score 1.378940
[LightGBM] [Info] Auto-choosing row-



[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.1, lgbm__max_depth=7, lgbm__n_estimators=500, lgbm__num_leaves=31, lgbm__subsample=0.6; total time=272.2min
[LightGBM] [Info] Number of positive: 253063, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.506592 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316796, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798820 -> initscore=1.378936
[LightGBM] [Info] Start training from score 1.378936




[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.05, lgbm__max_depth=3, lgbm__n_estimators=800, lgbm__num_leaves=31, lgbm__subsample=1.0; total time=76.4min
[LightGBM] [Info] Number of positive: 253063, number of negative: 63734
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.576050 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798818 -> initscore=1.378920
[LightGBM] [Info] Start training from score 1.378920




[CV] END lgbm__colsample_bytree=0.8, lgbm__learning_rate=0.1, lgbm__max_depth=5, lgbm__n_estimators=200, lgbm__num_leaves=31, lgbm__subsample=0.8; total time=84.8min
[LightGBM] [Info] Number of positive: 253064, number of negative: 63733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3.163414 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1385
[LightGBM] [Info] Number of data points in the train set: 316797, number of used features: 60
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.798821 -> initscore=1.378940
[LightGBM] [Info] Start training from score 1.378940


Exception ignored on calling ctypes callback function: <function _log_callback at 0x76d2447a49d0>
Traceback (most recent call last):
  File "/home/ricardo/.pyenv/versions/project_loan/lib/python3.10/site-packages/lightgbm/basic.py", line 287, in _log_callback
    def _log_callback(msg: bytes) -> None:
KeyboardInterrupt: 


No further splits with positive gain, best gain: -inf
No further splits with positive gain, best gain: -inf


Exception ignored on calling ctypes callback function: <function _log_callback at 0x72401cbdc9d0>
Traceback (most recent call last):
  File "/home/ricardo/.pyenv/versions/project_loan/lib/python3.10/site-packages/lightgbm/basic.py", line 287, in _log_callback
    def _log_callback(msg: bytes) -> None:
KeyboardInterrupt: 


No further splits with positive gain, best gain: -inf


Exception ignored on calling ctypes callback function: <function _log_callback at 0x77fa28dc49d0>
Traceback (most recent call last):
  File "/home/ricardo/.pyenv/versions/project_loan/lib/python3.10/site-packages/lightgbm/basic.py", line 287, in _log_callback
    def _log_callback(msg: bytes) -> None:
KeyboardInterrupt: 


KeyboardInterrupt: 

In [None]:
print("Best ROC-AUC:", rand_search.best_score_)
print("Best parameters:", rand_search.best_params_)

In [None]:
val_pred = rand_search.best_estimator_.predict_proba(X_val)[:, 1]
roc_auc_val = roc_auc_score(y_val, val_pred)
print("Validation ROC-AUC:", roc_auc_val)