# Data Loading and Initial Exploration

Loads the dataset, checks basic properties, and assesses data quality.

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import StackingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from imblearn.over_sampling import SMOTE
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline  # Import Pipeline
from sklearn.impute import SimpleImputer



import warnings
warnings.filterwarnings('ignore')



In [2]:
# Read the Dataset
df = pd.read_csv('../datasets/PCOS_data.csv')

#  Data Exploration and Cleaning

- Basic information about the dataset is printed, including data types and the presence of null values.
- Descriptive statistics are generated to understand the central tendency and distribution of the data.
- Column names are standardized by replacing spaces with underscores to facilitate easier data handling.

In [3]:
# Display basic information and the first few rows
print(df.info())
display(df.head())

# Generate descriptive statistics
display(df.describe())

# Check for missing values
print(df.isnull().sum())

# Normalize column names
df.columns = df.columns.str.strip().str.replace(' ', '_')
print(df.columns.tolist())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541 entries, 0 to 540
Data columns (total 44 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Sl. No                  541 non-null    int64  
 1   Patient File No.        541 non-null    int64  
 2   PCOS (Y/N)              541 non-null    int64  
 3    Age (yrs)              541 non-null    int64  
 4   Weight (Kg)             541 non-null    float64
 5   Height(Cm)              541 non-null    float64
 6   BMI                     541 non-null    float64
 7   Blood Group             541 non-null    int64  
 8   Pulse rate(bpm)         541 non-null    int64  
 9   RR (breaths/min)        541 non-null    int64  
 10  Hb(g/dl)                541 non-null    float64
 11  Cycle(R/I)              541 non-null    int64  
 12  Cycle length(days)      541 non-null    int64  
 13  Marraige Status (Yrs)   540 non-null    float64
 14  Pregnant(Y/N)           541 non-null    in

Unnamed: 0,Sl. No,Patient File No.,PCOS (Y/N),Age (yrs),Weight (Kg),Height(Cm),BMI,Blood Group,Pulse rate(bpm),RR (breaths/min),...,Pimples(Y/N),Fast food (Y/N),Reg.Exercise(Y/N),BP _Systolic (mmHg),BP _Diastolic (mmHg),Follicle No. (L),Follicle No. (R),Avg. F size (L) (mm),Avg. F size (R) (mm),Endometrium (mm)
0,1,1,0,28,44.6,152.0,19.3,15,78,22,...,0,1.0,0,110,80,3,3,18.0,18.0,8.5
1,2,2,0,36,65.0,161.5,24.9,15,74,20,...,0,0.0,0,120,70,3,5,15.0,14.0,3.7
2,3,3,1,33,68.8,165.0,25.3,11,72,18,...,1,1.0,0,120,80,13,15,18.0,20.0,10.0
3,4,4,0,37,65.0,148.0,29.7,13,72,20,...,0,0.0,0,120,70,2,2,15.0,14.0,7.5
4,5,5,0,25,52.0,161.0,20.1,11,72,18,...,0,0.0,0,120,80,3,4,16.0,14.0,7.0


Unnamed: 0,Sl. No,Patient File No.,PCOS (Y/N),Age (yrs),Weight (Kg),Height(Cm),BMI,Blood Group,Pulse rate(bpm),RR (breaths/min),...,Pimples(Y/N),Fast food (Y/N),Reg.Exercise(Y/N),BP _Systolic (mmHg),BP _Diastolic (mmHg),Follicle No. (L),Follicle No. (R),Avg. F size (L) (mm),Avg. F size (R) (mm),Endometrium (mm)
count,541.0,541.0,541.0,541.0,541.0,541.0,541.0,541.0,541.0,541.0,...,541.0,540.0,541.0,541.0,541.0,541.0,541.0,541.0,541.0,541.0
mean,271.0,271.0,0.327172,31.430684,59.637153,156.484835,24.307579,13.802218,73.247689,19.243993,...,0.489834,0.514815,0.247689,114.661738,76.927911,6.12939,6.641405,15.018115,15.451701,8.475915
std,156.317519,156.317519,0.469615,5.411006,11.028287,6.033545,4.055129,1.840812,4.430285,1.688629,...,0.500359,0.500244,0.43207,7.384556,5.574112,4.229294,4.436889,3.566839,3.318848,2.165381
min,1.0,1.0,0.0,20.0,31.0,137.0,12.4,11.0,13.0,16.0,...,0.0,0.0,0.0,12.0,8.0,0.0,0.0,0.0,0.0,0.0
25%,136.0,136.0,0.0,28.0,52.0,152.0,21.6,13.0,72.0,18.0,...,0.0,0.0,0.0,110.0,70.0,3.0,3.0,13.0,13.0,7.0
50%,271.0,271.0,0.0,31.0,59.0,156.0,24.2,14.0,72.0,18.0,...,0.0,1.0,0.0,110.0,80.0,5.0,6.0,15.0,16.0,8.5
75%,406.0,406.0,1.0,35.0,65.0,160.0,26.6,15.0,74.0,20.0,...,1.0,1.0,0.0,120.0,80.0,9.0,10.0,18.0,18.0,9.8
max,541.0,541.0,1.0,48.0,108.0,180.0,38.9,18.0,82.0,28.0,...,1.0,1.0,1.0,140.0,100.0,22.0,20.0,24.0,24.0,18.0


Sl. No                    0
Patient File No.          0
PCOS (Y/N)                0
 Age (yrs)                0
Weight (Kg)               0
Height(Cm)                0
BMI                       0
Blood Group               0
Pulse rate(bpm)           0
RR (breaths/min)          0
Hb(g/dl)                  0
Cycle(R/I)                0
Cycle length(days)        0
Marraige Status (Yrs)     1
Pregnant(Y/N)             0
No. of abortions          0
  I   beta-HCG(mIU/mL)    0
II    beta-HCG(mIU/mL)    0
FSH(mIU/mL)               0
LH(mIU/mL)                0
FSH/LH                    0
Hip(inch)                 0
Waist(inch)               0
Waist:Hip Ratio           0
TSH (mIU/L)               0
AMH(ng/mL)                0
PRL(ng/mL)                0
Vit D3 (ng/mL)            0
PRG(ng/mL)                0
RBS(mg/dl)                0
Weight gain(Y/N)          0
hair growth(Y/N)          0
Skin darkening (Y/N)      0
Hair loss(Y/N)            0
Pimples(Y/N)              0
Fast food (Y/N)     

- The script handles numeric columns by replacing non-numeric data and filling missing values with the median, which is robust against outliers.
- Categorical columns are processed by filling missing values with the mode.
- Additional feature engineering includes creating bins for BMI and age groups to capture more nuanced relationships in the data.

In [4]:
# Correctly handle numeric columns with special characters and potential non-numeric data
numeric_columns = ['BMI', 'Age_(yrs)', 'Weight_(Kg)', 'Waist:Hip_Ratio', 
                   'I___beta-HCG(mIU/mL)', 'II____beta-HCG(mIU/mL)', 
                   'FSH(mIU/mL)', 'LH(mIU/mL)', 'AMH(ng/mL)', 
                   'Cycle_length(days)', 'Endometrium_(mm)', 
                   'BP__Systolic_(mmHg)', 'BP__Diastolic_(mmHg)']

for col in numeric_columns:
    df[col] = pd.to_numeric(df[col].replace(r'^\.+$', '', regex=True), errors='coerce')
    df[col] = df[col].fillna(df[col].median())

# Handle categorical columns and fill missing values with mode
categorical_columns = ['PCOS_(Y/N)', 'Pregnant(Y/N)', 'Weight_gain(Y/N)', 
                       'hair_growth(Y/N)', 'Skin_darkening_(Y/N)', 
                       'Hair_loss(Y/N)', 'Pimples(Y/N)', 
                       'Fast_food_(Y/N)', 'Reg.Exercise(Y/N)', 
                       'Blood_Group']

for col in categorical_columns:
    df[col] = df[col].fillna(df[col].mode()[0])
    
    # Fill the single missing value in 'Marraige_Status_(Yrs)' with the median
df['Marraige_Status_(Yrs)'].fillna(df['Marraige_Status_(Yrs)'].median(), inplace=True)



In [5]:
# Encode categorical columns
labelencoder = LabelEncoder()
df[categorical_columns] = df[categorical_columns].apply(labelencoder.fit_transform)


In [6]:
# Scale all numeric columns
scaler = StandardScaler()
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

In [7]:
# Apply SMOTE for class balancing
X = df.drop('PCOS_(Y/N)', axis=1)
y = df['PCOS_(Y/N)']
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [8]:
# Display the resampled class distribution
print(f"Class distribution after SMOTE:\n{pd.Series(y_resampled).value_counts()}")

Class distribution after SMOTE:
PCOS_(Y/N)
0    364
1    364
Name: count, dtype: int64


In [9]:
# Example of clipping extreme outliers in Avg._F_size_(L)_(mm)
df['Avg._F_size_(L)_(mm)'] = df['Avg._F_size_(L)_(mm)'].clip(lower=-3, upper=3)


In [10]:
# Feature engineering for categorical binning
df['BMI_range'] = pd.cut(df['BMI'], bins=[0, 18.5, 24.9, 29.9, float('inf')], labels=[0, 1, 2, 3])
df['Age_group'] = pd.cut(df['Age_(yrs)'], bins=[0, 30, 40, 50, float('inf')], labels=[0, 1, 2, 3])


# Model Training 

In [11]:
# Prepare data for modeling
X = df.drop('PCOS_(Y/N)', axis=1)
y = df['PCOS_(Y/N)']

In [12]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [13]:
# Define a pipeline for preprocessing (imputation + scaling)
pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),  # Imputing missing values with median
    ('scaler', StandardScaler())                   # Standard scaling of features
])


In [14]:
# Apply the pipeline to both the training and test sets
X_train_preprocessed = pipeline.fit_transform(X_train)
X_test_preprocessed = pipeline.transform(X_test)

In [15]:
# Apply SMOTE only on the preprocessed training data
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train_preprocessed, y_train)


In [16]:
# Use the balanced (resampled) training data for model training
print(f"Class distribution after SMOTE:\n{pd.Series(y_resampled).value_counts()}")

# Check the shapes of the resampled and test sets to ensure consistency
print(f"Resampled training data shape: {X_resampled.shape}, {y_resampled.shape}")
print(f"Test data shape: {X_test_preprocessed.shape}, {y_test.shape}")

Class distribution after SMOTE:
PCOS_(Y/N)
0    291
1    291
Name: count, dtype: int64
Resampled training data shape: (582, 45), (582,)
Test data shape: (109, 45), (109,)


In [17]:
# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [18]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score


In [19]:
# Reinitialize classifiers with the same configuration
classifiers = {
    'KNN (k=3)': KNeighborsClassifier(n_neighbors=3),
    'KNN (k=5)': KNeighborsClassifier(n_neighbors=5),
    'Decision Tree (gini)': DecisionTreeClassifier(criterion='gini'),
    'Decision Tree (entropy)': DecisionTreeClassifier(criterion='entropy'),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Naive Bayes': GaussianNB(),
    'Dummy Classifier': DummyClassifier(strategy='most_frequent'),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
    'LightGBM': LGBMClassifier(random_state=42),
    'CatBoost': CatBoostClassifier(verbose=0, random_state=42)
}


In [20]:
# Train and evaluate classifiers using the resampled training data and test set
results = {}
for name, clf in classifiers.items():
    clf.fit(X_resampled, y_resampled)  # Use resampled y_resampled for training
    y_pred = clf.predict(X_test_preprocessed)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy
    print(f'{name} - Accuracy: {accuracy:.4f}')

KNN (k=3) - Accuracy: 0.8624
KNN (k=5) - Accuracy: 0.8716
Decision Tree (gini) - Accuracy: 0.8440
Decision Tree (entropy) - Accuracy: 0.8532
Random Forest - Accuracy: 0.9266
Naive Bayes - Accuracy: 0.4128
Dummy Classifier - Accuracy: 0.6697
XGBoost - Accuracy: 0.9083
[LightGBM] [Info] Number of positive: 291, number of negative: 291
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000390 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3563
[LightGBM] [Info] Number of data points in the train set: 582, number of used features: 42
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
LightGBM - Accuracy: 0.9083
CatBoost - Accuracy: 0.8991


In [21]:
# Display the results
results_df = pd.DataFrame(list(results.items()), columns=['Classifier', 'Accuracy'])
print(results_df.sort_values(by='Accuracy', ascending=False))

                Classifier  Accuracy
4            Random Forest  0.926606
7                  XGBoost  0.908257
8                 LightGBM  0.908257
9                 CatBoost  0.899083
1                KNN (k=5)  0.871560
0                KNN (k=3)  0.862385
3  Decision Tree (entropy)  0.853211
2     Decision Tree (gini)  0.844037
6         Dummy Classifier  0.669725
5              Naive Bayes  0.412844


# Hyperparameter Tuning with optuna (Bayesian optimization)

- Decision Tree
- XGBoost
- LightGBM
- CatBoost

In [22]:
import optuna
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

In [23]:
# Define cross-validation method
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=5)

In [24]:
# Decision Tree Tuning
def dt_objective(trial):
    params = {
        'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
    }
    dt = DecisionTreeClassifier(**params, random_state=42)
    score = cross_val_score(dt, X_train, y_train, cv=cv, scoring='accuracy').mean()
    return score

study_dt = optuna.create_study(direction='maximize')
study_dt.optimize(dt_objective, n_trials=50)
best_dt_params = study_dt.best_trial.params
print(f"Best Decision Tree params: {best_dt_params}")

[I 2024-10-15 16:44:41,959] A new study created in memory with name: no-name-8a17dc40-9b46-4059-836b-f466873b845a
[I 2024-10-15 16:44:41,979] Trial 0 finished with value: 0.8010692328254478 and parameters: {'criterion': 'entropy', 'max_depth': 15, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.8010692328254478.
[I 2024-10-15 16:44:41,999] Trial 1 finished with value: 0.8055600106923283 and parameters: {'criterion': 'entropy', 'max_depth': 9, 'min_samples_split': 7, 'min_samples_leaf': 7}. Best is trial 1 with value: 0.8055600106923283.
[I 2024-10-15 16:44:42,012] Trial 2 finished with value: 0.8310344827586208 and parameters: {'criterion': 'gini', 'max_depth': 14, 'min_samples_split': 9, 'min_samples_leaf': 7}. Best is trial 2 with value: 0.8310344827586208.
[I 2024-10-15 16:44:42,035] Trial 3 finished with value: 0.8332531408714248 and parameters: {'criterion': 'gini', 'max_depth': 11, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 3 with

Best Decision Tree params: {'criterion': 'gini', 'max_depth': 14, 'min_samples_split': 8, 'min_samples_leaf': 2}


In [25]:
# XGBoost Tuning
def xgb_objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)
    }
    xgb = XGBClassifier(**params, use_label_encoder=False, eval_metric='logloss', random_state=42)
    score = cross_val_score(xgb, X_train, y_train, cv=cv, scoring='accuracy').mean()
    return score

study_xgb = optuna.create_study(direction='maximize')
study_xgb.optimize(xgb_objective, n_trials=50)
best_xgb_params = study_xgb.best_trial.params
print(f"Best XGBoost params: {best_xgb_params}")

[I 2024-10-15 16:44:50,669] A new study created in memory with name: no-name-fa0534de-0542-4879-8ec1-c4222f2d5fe0
[I 2024-10-15 16:44:51,181] Trial 0 finished with value: 0.8726543704891739 and parameters: {'n_estimators': 349, 'max_depth': 8, 'learning_rate': 0.09704560461821723, 'subsample': 0.5393712191226601, 'colsample_bytree': 0.7936679857165185}. Best is trial 0 with value: 0.8726543704891739.
[I 2024-10-15 16:44:51,536] Trial 1 finished with value: 0.8773322641005079 and parameters: {'n_estimators': 283, 'max_depth': 4, 'learning_rate': 0.13322663718745945, 'subsample': 0.7262442449762929, 'colsample_bytree': 0.6963676636538845}. Best is trial 1 with value: 0.8773322641005079.
[I 2024-10-15 16:44:51,765] Trial 2 finished with value: 0.8726543704891739 and parameters: {'n_estimators': 183, 'max_depth': 3, 'learning_rate': 0.23864128518533162, 'subsample': 0.902162711672826, 'colsample_bytree': 0.9189422859748855}. Best is trial 1 with value: 0.8773322641005079.
[I 2024-10-15 16:

Best XGBoost params: {'n_estimators': 104, 'max_depth': 8, 'learning_rate': 0.07964927706635355, 'subsample': 0.6785595830548571, 'colsample_bytree': 0.654615031243744}


In [26]:
# LightGBM Tuning
def lgbm_objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0)
    }
    lgbm = LGBMClassifier(**params, random_state=42)
    score = cross_val_score(lgbm, X_train, y_train, cv=cv, scoring='accuracy').mean()
    return score

study_lgbm = optuna.create_study(direction='maximize')
study_lgbm.optimize(lgbm_objective, n_trials=50)
best_lgbm_params = study_lgbm.best_trial.params
print(f"Best LightGBM params: {best_lgbm_params}")


[I 2024-10-15 16:45:10,873] A new study created in memory with name: no-name-e8ecb3c5-f06c-4982-83fb-6db5dda8e7c2
[I 2024-10-15 16:45:11,038] Trial 0 finished with value: 0.8727612937717188 and parameters: {'n_estimators': 193, 'max_depth': 8, 'learning_rate': 0.24804625448646214, 'num_leaves': 35, 'subsample': 0.6209137700715128}. Best is trial 0 with value: 0.8727612937717188.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000157 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:11,383] Trial 1 finished with value: 0.8797113071371291 and parameters: {'n_estimators': 380, 'max_depth': 5, 'learning_rate': 0.08495476439871114, 'num_leaves': 130, 'subsample': 0.5510940414499939}. Best is trial 1 with value: 0.8797113071371291.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000227 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:11,634] Trial 2 finished with value: 0.875006682705159 and parameters: {'n_estimators': 448, 'max_depth': 3, 'learning_rate': 0.1353807196496994, 'num_leaves': 49, 'subsample': 0.9727846039303163}. Best is trial 1 with value: 0.8797113071371291.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000153 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:11,782] Trial 3 finished with value: 0.8704891740176424 and parameters: {'n_estimators': 116, 'max_depth': 7, 'learning_rate': 0.11567842336804633, 'num_leaves': 94, 'subsample': 0.504055818903653}. Best is trial 1 with value: 0.8797113071371291.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000151 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:11,924] Trial 4 finished with value: 0.8750334135257953 and parameters: {'n_estimators': 122, 'max_depth': 6, 'learning_rate': 0.14795253807335831, 'num_leaves': 32, 'subsample': 0.9367407843119}. Best is trial 1 with value: 0.8797113071371291.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:12,295] Trial 5 finished with value: 0.8797113071371291 and parameters: {'n_estimators': 367, 'max_depth': 7, 'learning_rate': 0.07887990580587556, 'num_leaves': 51, 'subsample': 0.7921423067783864}. Best is trial 1 with value: 0.8797113071371291.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350


[I 2024-10-15 16:45:12,454] Trial 6 finished with value: 0.8912857524726009 and parameters: {'n_estimators': 206, 'max_depth': 7, 'learning_rate': 0.2901629169413839, 'num_leaves': 121, 'subsample': 0.7549714495703933}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000190 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1656
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:12,594] Trial 7 finished with value: 0.8750601443464314 and parameters: {'n_estimators': 166, 'max_depth': 4, 'learning_rate': 0.11165841546875713, 'num_leaves': 28, 'subsample': 0.5627600829168995}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000200 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000193 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:12,851] Trial 8 finished with value: 0.8820368885324779 and parameters: {'n_estimators': 443, 'max_depth': 6, 'learning_rate': 0.23490140668600648, 'num_leaves': 42, 'subsample': 0.635516966344013}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000157 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:12,989] Trial 9 finished with value: 0.8681903234429298 and parameters: {'n_estimators': 129, 'max_depth': 5, 'learning_rate': 0.09154718150614925, 'num_leaves': 36, 'subsample': 0.9538839680301774}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000157 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000204 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:13,186] Trial 10 finished with value: 0.879684576316493 and parameters: {'n_estimators': 241, 'max_depth': 10, 'learning_rate': 0.29136453058976364, 'num_leaves': 149, 'subsample': 0.7977424665659788}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000212 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000268 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:13,412] Trial 11 finished with value: 0.8842822774659181 and parameters: {'n_estimators': 290, 'max_depth': 8, 'learning_rate': 0.2194595572505972, 'num_leaves': 90, 'subsample': 0.7011317852480419}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000137 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:13,642] Trial 12 finished with value: 0.8773857257417802 and parameters: {'n_estimators': 276, 'max_depth': 9, 'learning_rate': 0.20355886478661162, 'num_leaves': 104, 'subsample': 0.7166404364836946}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000183 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:14,008] Trial 13 finished with value: 0.8682705159048384 and parameters: {'n_estimators': 327, 'max_depth': 8, 'learning_rate': 0.010081910252734028, 'num_leaves': 75, 'subsample': 0.7177575084705496}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000213 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000178 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:14,195] Trial 14 finished with value: 0.8773857257417802 and parameters: {'n_estimators': 228, 'max_depth': 9, 'learning_rate': 0.29930635991854926, 'num_leaves': 114, 'subsample': 0.8588575592767635}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000210 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000178 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:14,421] Trial 15 finished with value: 0.8820903501737505 and parameters: {'n_estimators': 287, 'max_depth': 8, 'learning_rate': 0.20343621418051036, 'num_leaves': 74, 'subsample': 0.6641935152492909}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000177 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:14,674] Trial 16 finished with value: 0.872788024592355 and parameters: {'n_estimators': 336, 'max_depth': 10, 'learning_rate': 0.2480917908580136, 'num_leaves': 124, 'subsample': 0.8201763451009417}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000153 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000243 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:14,893] Trial 17 finished with value: 0.872788024592355 and parameters: {'n_estimators': 228, 'max_depth': 7, 'learning_rate': 0.17943184227479034, 'num_leaves': 85, 'subsample': 0.8696948974649501}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000139 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:15,061] Trial 18 finished with value: 0.8681635926222935 and parameters: {'n_estimators': 169, 'max_depth': 9, 'learning_rate': 0.27247601325175047, 'num_leaves': 146, 'subsample': 0.7291688534277431}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000147 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:15,313] Trial 19 finished with value: 0.8843090082865543 and parameters: {'n_estimators': 398, 'max_depth': 5, 'learning_rate': 0.21696020275977398, 'num_leaves': 65, 'subsample': 0.6513814857908736}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000172 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000147 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:15,603] Trial 20 finished with value: 0.8750868751670676 and parameters: {'n_estimators': 490, 'max_depth': 5, 'learning_rate': 0.1794072157704752, 'num_leaves': 62, 'subsample': 0.5860044955426144}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000153 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:15,855] Trial 21 finished with value: 0.8727612937717188 and parameters: {'n_estimators': 368, 'max_depth': 6, 'learning_rate': 0.2128903870841317, 'num_leaves': 97, 'subsample': 0.6675497280467559}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000177 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000155 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:16,072] Trial 22 finished with value: 0.8750601443464314 and parameters: {'n_estimators': 411, 'max_depth': 3, 'learning_rate': 0.2649961958804102, 'num_leaves': 71, 'subsample': 0.7588186498539464}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000204 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540


[I 2024-10-15 16:45:16,264] Trial 23 finished with value: 0.8843090082865543 and parameters: {'n_estimators': 268, 'max_depth': 4, 'learning_rate': 0.2256771840346936, 'num_leaves': 113, 'subsample': 0.6772399881242143}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000157 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1656
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:16,462] Trial 24 finished with value: 0.8750601443464314 and parameters: {'n_estimators': 254, 'max_depth': 4, 'learning_rate': 0.1787546749233141, 'num_leaves': 130, 'subsample': 0.6615972706112776}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1656
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000197 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:16,670] Trial 25 finished with value: 0.8750334135257953 and parameters: {'n_estimators': 321, 'max_depth': 4, 'learning_rate': 0.27095418065172466, 'num_leaves': 113, 'subsample': 0.7531997761148207}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000206 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:16,842] Trial 26 finished with value: 0.8820101577118418 and parameters: {'n_estimators': 200, 'max_depth': 5, 'learning_rate': 0.2424771949380473, 'num_leaves': 109, 'subsample': 0.6091207126452922}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000168 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000147 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:17,028] Trial 27 finished with value: 0.8773857257417802 and parameters: {'n_estimators': 267, 'max_depth': 4, 'learning_rate': 0.2738221122256995, 'num_leaves': 138, 'subsample': 0.6975672856462707}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000161 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000159 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:17,189] Trial 28 finished with value: 0.8819834268912056 and parameters: {'n_estimators': 207, 'max_depth': 3, 'learning_rate': 0.18943956638849818, 'num_leaves': 119, 'subsample': 0.7762474121577749}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000171 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000191 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:17,358] Trial 29 finished with value: 0.87043571237637 and parameters: {'n_estimators': 163, 'max_depth': 6, 'learning_rate': 0.2233120414303813, 'num_leaves': 63, 'subsample': 0.6272916161405949}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000192 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000154 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:17,621] Trial 30 finished with value: 0.8727612937717188 and parameters: {'n_estimators': 345, 'max_depth': 5, 'learning_rate': 0.16043735301305165, 'num_leaves': 103, 'subsample': 0.8482752457044529}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000139 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:17,847] Trial 31 finished with value: 0.8705159048382786 and parameters: {'n_estimators': 299, 'max_depth': 8, 'learning_rate': 0.24958454162358573, 'num_leaves': 87, 'subsample': 0.6974344279070271}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000153 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000179 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:18,081] Trial 32 finished with value: 0.8773857257417802 and parameters: {'n_estimators': 305, 'max_depth': 7, 'learning_rate': 0.22545782084485125, 'num_leaves': 84, 'subsample': 0.6914187143470658}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:18,290] Trial 33 finished with value: 0.8797380379577653 and parameters: {'n_estimators': 256, 'max_depth': 8, 'learning_rate': 0.2568212486788904, 'num_leaves': 128, 'subsample': 0.6495591023332872}. Best is trial 6 with value: 0.8912857524726009.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:18,533] Trial 34 finished with value: 0.8936380646885859 and parameters: {'n_estimators': 398, 'max_depth': 7, 'learning_rate': 0.2902257790086279, 'num_leaves': 94, 'subsample': 0.6003675054114488}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000150 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000159 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:18,789] Trial 35 finished with value: 0.8750334135257953 and parameters: {'n_estimators': 405, 'max_depth': 6, 'learning_rate': 0.2909309691475887, 'num_leaves': 98, 'subsample': 0.5012383985335275}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000154 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000173 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:19,081] Trial 36 finished with value: 0.8773857257417802 and parameters: {'n_estimators': 495, 'max_depth': 7, 'learning_rate': 0.28707076699672995, 'num_leaves': 140, 'subsample': 0.5447764046151486}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000176 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:19,330] Trial 37 finished with value: 0.8751136059877039 and parameters: {'n_estimators': 441, 'max_depth': 4, 'learning_rate': 0.23664941816183938, 'num_leaves': 59, 'subsample': 0.6081321645584659}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000182 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:19,622] Trial 38 finished with value: 0.8751136059877037 and parameters: {'n_estimators': 395, 'max_depth': 5, 'learning_rate': 0.15719014905848808, 'num_leaves': 119, 'subsample': 0.5398253505564559}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000168 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:19,852] Trial 39 finished with value: 0.8797380379577653 and parameters: {'n_estimators': 357, 'max_depth': 7, 'learning_rate': 0.27892864673818574, 'num_leaves': 106, 'subsample': 0.5634478754412089}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000193 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000178 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:20,105] Trial 40 finished with value: 0.8727612937717188 and parameters: {'n_estimators': 420, 'max_depth': 6, 'learning_rate': 0.2564161733261914, 'num_leaves': 20, 'subsample': 0.587952321581068}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000135 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000173 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:20,377] Trial 41 finished with value: 0.8751670676289762 and parameters: {'n_estimators': 380, 'max_depth': 7, 'learning_rate': 0.21465041871045681, 'num_leaves': 90, 'subsample': 0.6762932332984568}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000224 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000219 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:20,622] Trial 42 finished with value: 0.8658647420475809 and parameters: {'n_estimators': 310, 'max_depth': 8, 'learning_rate': 0.20188498516152534, 'num_leaves': 80, 'subsample': 0.6348667692583028}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000182 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:20,920] Trial 43 finished with value: 0.8843624699278267 and parameters: {'n_estimators': 465, 'max_depth': 8, 'learning_rate': 0.2329945975218156, 'num_leaves': 92, 'subsample': 0.7397842571269986}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000176 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:21,204] Trial 44 finished with value: 0.8842822774659181 and parameters: {'n_estimators': 461, 'max_depth': 3, 'learning_rate': 0.13604291343301012, 'num_leaves': 52, 'subsample': 0.7263209989022625}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000199 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number

[I 2024-10-15 16:45:21,488] Trial 45 finished with value: 0.884255546645282 and parameters: {'n_estimators': 468, 'max_depth': 9, 'learning_rate': 0.2980156303464222, 'num_leaves': 100, 'subsample': 0.8035156594670634}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000179 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1672
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000207 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number

[I 2024-10-15 16:45:21,757] Trial 46 finished with value: 0.8820636193531142 and parameters: {'n_estimators': 421, 'max_depth': 7, 'learning_rate': 0.2347814544551657, 'num_leaves': 114, 'subsample': 0.7498542287761301}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000168 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:22,051] Trial 47 finished with value: 0.8797380379577653 and parameters: {'n_estimators': 473, 'max_depth': 8, 'learning_rate': 0.26323112888630695, 'num_leaves': 94, 'subsample': 0.7426945527837923}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000228 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:22,298] Trial 48 finished with value: 0.8773322641005079 and parameters: {'n_estimators': 437, 'max_depth': 4, 'learning_rate': 0.28285288036972744, 'num_leaves': 69, 'subsample': 0.8979578483258785}. Best is trial 34 with value: 0.8936380646885859.


[LightGBM] [Info] Number of positive: 113, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1657
[LightGBM] [Info] Number of data points in the train set: 346, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.326590 -> initscore=-0.723651
[LightGBM] [Info] Start training from score -0.723651
[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000159 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number

[I 2024-10-15 16:45:22,714] Trial 49 finished with value: 0.8774926490243249 and parameters: {'n_estimators': 391, 'max_depth': 6, 'learning_rate': 0.03587003906756411, 'num_leaves': 79, 'subsample': 0.7741196686792948}. Best is trial 34 with value: 0.8936380646885859.


Best LightGBM params: {'n_estimators': 398, 'max_depth': 7, 'learning_rate': 0.2902257790086279, 'num_leaves': 94, 'subsample': 0.6003675054114488}


In [27]:
# CatBoost Tuning
def catboost_objective(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 500),
        'depth': trial.suggest_int('depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-5, 10),
    }
    catboost = CatBoostClassifier(**params, verbose=0, random_state=42)
    score = cross_val_score(catboost, X_train, y_train, cv=cv, scoring='accuracy').mean()
    return score

study_catboost = optuna.create_study(direction='maximize')
study_catboost.optimize(catboost_objective, n_trials=50)
best_catboost_params = study_catboost.best_trial.params
print(f"Best CatBoost params: {best_catboost_params}")

[I 2024-10-15 16:45:38,657] A new study created in memory with name: no-name-63ee8df5-18f9-4060-ac3b-09b7260756f5
[I 2024-10-15 16:45:40,701] Trial 0 finished with value: 0.8842288158246457 and parameters: {'iterations': 211, 'depth': 7, 'learning_rate': 0.048617761959653095, 'l2_leaf_reg': 0.7855184718312891}. Best is trial 0 with value: 0.8842288158246457.
[I 2024-10-15 16:45:41,721] Trial 1 finished with value: 0.89585672280139 and parameters: {'iterations': 219, 'depth': 4, 'learning_rate': 0.047438328962724374, 'l2_leaf_reg': 4.614037729709972}. Best is trial 1 with value: 0.89585672280139.
[I 2024-10-15 16:45:44,186] Trial 2 finished with value: 0.8934509489441325 and parameters: {'iterations': 266, 'depth': 7, 'learning_rate': 0.198273289005973, 'l2_leaf_reg': 8.963053335811539}. Best is trial 1 with value: 0.89585672280139.
[I 2024-10-15 16:45:46,584] Trial 3 finished with value: 0.8842822774659181 and parameters: {'iterations': 420, 'depth': 5, 'learning_rate': 0.0519049712688

Best CatBoost params: {'iterations': 178, 'depth': 3, 'learning_rate': 0.26755781320842015, 'l2_leaf_reg': 0.7966229454617894}


In [28]:
# Train the models with the best parameters found
dt_best = DecisionTreeClassifier(**best_dt_params, random_state=42)
xgb_best = XGBClassifier(**best_xgb_params, use_label_encoder=False, eval_metric='logloss', random_state=42)
lgbm_best = LGBMClassifier(**best_lgbm_params, random_state=42)
catboost_best = CatBoostClassifier(**best_catboost_params, verbose=0, random_state=42)

# Cross-validation for evaluation
dt_score = cross_val_score(dt_best, X_train, y_train, cv=cv, scoring='accuracy').mean()
xgb_score = cross_val_score(xgb_best, X_train, y_train, cv=cv, scoring='accuracy').mean()
lgbm_score = cross_val_score(lgbm_best, X_train, y_train, cv=cv, scoring='accuracy').mean()
catboost_score = cross_val_score(catboost_best, X_train, y_train, cv=cv, scoring='accuracy').mean()

print(f"Decision Tree Final Cross-Validation Accuracy: {dt_score:.4f}")
print(f"XGBoost Final Cross-Validation Accuracy: {xgb_score:.4f}")
print(f"LightGBM Final Cross-Validation Accuracy: {lgbm_score:.4f}")
print(f"CatBoost Final Cross-Validation Accuracy: {catboost_score:.4f}")

[LightGBM] [Info] Number of positive: 113, number of negative: 232
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.327536 -> initscore=-0.719350
[LightGBM] [Info] Start training from score -0.719350
[LightGBM] [Info] Number of positive: 112, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000200 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1649
[LightGBM] [Info] Number of data points in the train set: 345, number of used features: 44
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.324638 -> initscore=-0.732540
[LightGBM] [Info] Start training from score -0.732540
[LightGBM] [Info] Number