### **MODEL TRAINING**

In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score,train_test_split
from sklearn.metrics import classification_report

In [18]:
Kepler_data=pd.read_csv('koi_toi_combined_features.csv')

In [19]:
Kepler_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9565 entries, 0 to 9564
Data columns (total 11 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   period    9565 non-null   float64
 1   duration  9565 non-null   float64
 2   depth     9565 non-null   float64
 3   radius    9565 non-null   float64
 4   insol     9565 non-null   float64
 5   teq       9565 non-null   float64
 6   teff      9565 non-null   float64
 7   srad      9565 non-null   float64
 8   logg      9565 non-null   float64
 9   mag       9565 non-null   float64
 10  label     9565 non-null   int64  
dtypes: float64(10), int64(1)
memory usage: 822.1 KB


In [20]:
X=Kepler_data.drop(columns=['label'])
y=Kepler_data['label']

In [21]:
from sklearn.preprocessing import LabelEncoder

In [22]:
le=LabelEncoder()
le.fit(y)
y_trf=le.transform(y)

In [23]:
le.classes_

array([0, 1])

In [24]:
X_train,X_test,y_train,y_test=train_test_split(X,y_trf,test_size=0.2,random_state=42,stratify=y)

In [25]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((7652, 10), (1913, 10), (7652,), (1913,))

In [26]:
from sklearn.ensemble import RandomForestClassifier

In [27]:
rdf=RandomForestClassifier(n_estimators=200,criterion='gini',max_depth=None,bootstrap=True,oob_score=True)

In [28]:
rdf.fit(X_train,y_train)

In [29]:
y_predict=rdf.predict(X_test)

In [30]:
print(classification_report(y_test,y_predict))

              precision    recall  f1-score   support

           0       0.92      0.87      0.89      1119
           1       0.83      0.89      0.86       794

    accuracy                           0.88      1913
   macro avg       0.87      0.88      0.87      1913
weighted avg       0.88      0.88      0.88      1913



In [31]:
cross_val_score(estimator=rdf,X=X,y=y,cv=10,scoring='accuracy').mean()

np.float64(0.8696211137489452)

In [32]:
rdf.feature_importances_

array([0.11821016, 0.10518125, 0.10687605, 0.22990684, 0.10045672,
       0.09543965, 0.06742035, 0.06205483, 0.05348653, 0.06096763])

The random forest couldnot give much of the accuracy thus we will go to other models.

In [33]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier as GBC

### **MAKING GRADIENT BOOSTING MODEL**

In [34]:
param_grid = {
    'n_estimators': [100,120,150,180,200,500],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5,None]
}

gbc = GBC(random_state=42)

gs = GridSearchCV(
    estimator=gbc,
    param_grid=param_grid,
    scoring=['accuracy','precision_macro','recall_macro','f1_macro'],
    refit='f1_macro',   # final model chosen by f1_macro
    cv=5,
    n_jobs=-1,
    verbose=1
)

# fit on training data (use X_train, y_train from your notebook)
gs.fit(X_train, y_train)

print("Best params:", gs.best_params_)
print("Best f1_macro (cv):", gs.best_score_)


Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best params: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 500}
Best f1_macro (cv): 0.8869520970929244


In [35]:
param_grid = {
    'n_estimators': [100,120,500,150,180,200],
    'criterion':['gini'],
    'max_depth': [3, 5,6,10,None],
    'min_samples_leaf':[1,2,4],
    'oob_score':[True,False],
    'bootstrap':[True,False],
    'max_samples':[0,0.1,0.5,0.8,1]
}

rfc = RandomForestClassifier(random_state=42)

gs2 = GridSearchCV(
    estimator=rfc,
    param_grid=param_grid,
    scoring=['accuracy','precision_macro','recall_macro','f1_macro'],
    refit='f1_macro',   # final model chosen by f1_macro
    cv=5,
    n_jobs=-1,
    verbose=1
)

# fit on training data (use X_train, y_train from your notebook)
gs2.fit(X_train, y_train)

print("Best params:", gs2.best_params_)
print("Best f1_macro (cv):", gs2.best_score_)


Fitting 5 folds for each of 1800 candidates, totalling 9000 fits


5400 fits failed out of a total of 9000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1800 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Acer\AppData\Roaming\Python\Python312\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Acer\AppData\Roaming\Python\Python312\site-packages\sklearn\base.py", line 1382, in wrapper
    estimator._validate_params()
  File "C:\Users\Acer\AppData\Roaming\Python\Python312\site-packages\sklearn\base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Acer\AppData\Roaming\Python\Python312\site-packages\sklearn\utils\_param_validat

Best params: {'bootstrap': True, 'criterion': 'gini', 'max_depth': None, 'max_samples': 0.8, 'min_samples_leaf': 1, 'n_estimators': 500, 'oob_score': True}
Best f1_macro (cv): 0.8840700918322708


In [36]:
from xgboost import XGBClassifier
# X_train, X_test, y_train, y_test should already exist in the notebook
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

param_grid = {
    'n_estimators': [50, 100, 200,500],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

gs_xgb = GridSearchCV(
    estimator=xgb,
    param_grid=param_grid,
    scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'],
    refit='f1_macro',
    cv=5,
    n_jobs=-1,
    verbose=2
)

# fit
gs_xgb.fit(X_train, y_train)

# results
print("Best params:", gs_xgb.best_params_)
print("Best f1_macro (cv):", gs_xgb.best_score_)


Fitting 5 folds for each of 216 candidates, totalling 1080 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Best params: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 500, 'subsample': 0.8}
Best f1_macro (cv): 0.8927129881779686


In [37]:
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.base import BaseEstimator, ClassifierMixin, clone

class OOFStackingClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, base_models, meta_model, n_splits=5, random_state=42):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_splits = n_splits
        self.random_state = random_state
        self.fitted_base_models = []  # will hold final trained clones of base models

    def fit(self, X, y):
        X = np.asarray(X)
        y = np.asarray(y)
        n_samples = X.shape[0]

        skf = StratifiedKFold(n_splits=self.n_splits, shuffle=True, random_state=self.random_state)
        oof_preds = np.zeros((n_samples, len(self.base_models)))

        # Out-of-fold predictions
        for i, model in enumerate(self.base_models):
            oof = np.zeros(n_samples)
            for train_idx, val_idx in skf.split(X, y):
                mdl_clone = clone(model)
                mdl_clone.fit(X[train_idx], y[train_idx])
                oof[val_idx] = mdl_clone.predict_proba(X[val_idx])[:, 1]
            oof_preds[:, i] = oof

        # Train meta-model on OOF predictions
        self.meta_model.fit(oof_preds, y)

        # Retrain base models on full dataset
        self.fitted_base_models = [clone(m).fit(X, y) for m in self.base_models]

        return self

    def predict_proba(self, X):
        X = np.asarray(X)
        # Get predictions from fully trained base models
        meta_features = np.column_stack([
            m.predict_proba(X)[:, 1] for m in self.fitted_base_models
        ])
        return self.meta_model.predict_proba(meta_features)

    def predict(self, X):
        return (self.predict_proba(X)[:, 1] > 0.5).astype(int)


In [44]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import SVC

# Define base learners
rf = RandomForestClassifier(bootstrap= True, criterion= 'gini', max_depth= None, 
                            max_samples= 0.8, min_samples_leaf= 1, n_estimators= 500, oob_score=True)

xgb = XGBClassifier(n_estimators=500, max_depth=7, learning_rate=0.1,
                    subsample=1, colsample_bytree=1,
                    eval_metric='logloss', use_label_encoder=False, random_state=42)

gb = GradientBoostingClassifier(learning_rate= 0.1, max_depth= 5, n_estimators= 500,
                                 subsample=1, random_state=42)


lgb = LGBMClassifier(n_estimators=500, learning_rate=0.05,
                     subsample=0.8, colsample_bytree=0.8, random_state=42)

svc = SVC(C=2.0, kernel='rbf', probability=True, random_state=42)

base_models = [rf, xgb, gb, lgb, svc]

# Option 1: Logistic Regression as meta learner
meta_log = LogisticRegression(penalty='l2', C=1.0, solver='lbfgs', max_iter=500, random_state=42)

# Option 2: Random Forest as meta learnern
meta_rf = RandomForestClassifier(n_estimators=200, max_depth=None, random_state=42)
#option 3:Voting classifier
meta_voting = VotingClassifier(estimators=[('gb', gb), ('xgb', xgb)], voting='soft')


# Build stacking model (just swap meta model here)
stack_clf = OOFStackingClassifier(base_models=base_models, meta_model=meta_voting, n_splits=5)

# Fit and predict
stack_clf.fit(X_train, y_train)
y_pred = stack_clf.predict(X_test)
y_pred_proba = stack_clf.predict_proba(X_test)[:, 1]


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 2541, number of negative: 3580
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000343 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6121, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.415128 -> initscore=-0.342805
[LightGBM] [Info] Start training from score -0.342805




[LightGBM] [Info] Number of positive: 2540, number of negative: 3581
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000329 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6121, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.414965 -> initscore=-0.343478
[LightGBM] [Info] Start training from score -0.343478




[LightGBM] [Info] Number of positive: 2541, number of negative: 3581
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6122, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.415060 -> initscore=-0.343084
[LightGBM] [Info] Start training from score -0.343084




[LightGBM] [Info] Number of positive: 2541, number of negative: 3581
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000315 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6122, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.415060 -> initscore=-0.343084
[LightGBM] [Info] Start training from score -0.343084




[LightGBM] [Info] Number of positive: 2541, number of negative: 3581
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6122, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.415060 -> initscore=-0.343084
[LightGBM] [Info] Start training from score -0.343084


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3176, number of negative: 4476
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000193 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7652, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.415055 -> initscore=-0.343107
[LightGBM] [Info] Start training from score -0.343107




In [45]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.8834291688447464

In [None]:
cross_val_score(estimator=stack_clf,X=X,y=y,cv=10,scoring='accuracy').mean()

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 2681, number of negative: 4205
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000206 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.389341 -> initscore=-0.450084
[LightGBM] [Info] Start training from score -0.450084




[LightGBM] [Info] Number of positive: 2681, number of negative: 4205
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000241 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.389341 -> initscore=-0.450084
[LightGBM] [Info] Start training from score -0.450084




[LightGBM] [Info] Number of positive: 2680, number of negative: 4206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000223 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.389195 -> initscore=-0.450695
[LightGBM] [Info] Start training from score -0.450695




[LightGBM] [Info] Number of positive: 2681, number of negative: 4206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000201 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.389284 -> initscore=-0.450322
[LightGBM] [Info] Start training from score -0.450322




[LightGBM] [Info] Number of positive: 2681, number of negative: 4206
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000252 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.389284 -> initscore=-0.450322
[LightGBM] [Info] Start training from score -0.450322


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3351, number of negative: 5257
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000228 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 8608, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.389289 -> initscore=-0.450302
[LightGBM] [Info] Start training from score -0.450302


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 2544, number of negative: 4342
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000373 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.369445 -> initscore=-0.534597
[LightGBM] [Info] Start training from score -0.534597




[LightGBM] [Info] Number of positive: 2544, number of negative: 4342
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000230 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.369445 -> initscore=-0.534597
[LightGBM] [Info] Start training from score -0.534597




[LightGBM] [Info] Number of positive: 2544, number of negative: 4342
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000216 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.369445 -> initscore=-0.534597
[LightGBM] [Info] Start training from score -0.534597




[LightGBM] [Info] Number of positive: 2544, number of negative: 4343
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000262 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.369392 -> initscore=-0.534828
[LightGBM] [Info] Start training from score -0.534828




[LightGBM] [Info] Number of positive: 2544, number of negative: 4343
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000251 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.369392 -> initscore=-0.534828
[LightGBM] [Info] Start training from score -0.534828


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3180, number of negative: 5428
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000213 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 8608, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.369424 -> initscore=-0.534690
[LightGBM] [Info] Start training from score -0.534690


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 2658, number of negative: 4228
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000368 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.386001 -> initscore=-0.464155
[LightGBM] [Info] Start training from score -0.464155




[LightGBM] [Info] Number of positive: 2658, number of negative: 4228
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000252 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.386001 -> initscore=-0.464155
[LightGBM] [Info] Start training from score -0.464155




[LightGBM] [Info] Number of positive: 2658, number of negative: 4228
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000233 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.386001 -> initscore=-0.464155
[LightGBM] [Info] Start training from score -0.464155




[LightGBM] [Info] Number of positive: 2659, number of negative: 4228
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000322 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.386090 -> initscore=-0.463779
[LightGBM] [Info] Start training from score -0.463779




[LightGBM] [Info] Number of positive: 2659, number of negative: 4228
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000331 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.386090 -> initscore=-0.463779
[LightGBM] [Info] Start training from score -0.463779


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3323, number of negative: 5285
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 8608, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.386036 -> initscore=-0.464005
[LightGBM] [Info] Start training from score -0.464005


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 2864, number of negative: 4022
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000205 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.415916 -> initscore=-0.339560
[LightGBM] [Info] Start training from score -0.339560




[LightGBM] [Info] Number of positive: 2865, number of negative: 4021
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000241 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.416062 -> initscore=-0.338962
[LightGBM] [Info] Start training from score -0.338962




[LightGBM] [Info] Number of positive: 2865, number of negative: 4021
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.416062 -> initscore=-0.338962
[LightGBM] [Info] Start training from score -0.338962




[LightGBM] [Info] Number of positive: 2865, number of negative: 4022
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000229 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.416001 -> initscore=-0.339211
[LightGBM] [Info] Start training from score -0.339211




[LightGBM] [Info] Number of positive: 2865, number of negative: 4022
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000263 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.416001 -> initscore=-0.339211
[LightGBM] [Info] Start training from score -0.339211


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3581, number of negative: 5027
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000232 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 8608, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.416008 -> initscore=-0.339181
[LightGBM] [Info] Start training from score -0.339181


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3020, number of negative: 3866
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000438 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.438571 -> initscore=-0.246964
[LightGBM] [Info] Start training from score -0.246964




[LightGBM] [Info] Number of positive: 3020, number of negative: 3866
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000209 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.438571 -> initscore=-0.246964
[LightGBM] [Info] Start training from score -0.246964




[LightGBM] [Info] Number of positive: 3020, number of negative: 3866
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6886, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.438571 -> initscore=-0.246964
[LightGBM] [Info] Start training from score -0.246964




[LightGBM] [Info] Number of positive: 3020, number of negative: 3867
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000200 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.438507 -> initscore=-0.247222
[LightGBM] [Info] Start training from score -0.247222




[LightGBM] [Info] Number of positive: 3020, number of negative: 3867
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000234 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.438507 -> initscore=-0.247222
[LightGBM] [Info] Start training from score -0.247222


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3775, number of negative: 4833
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 8608, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.438546 -> initscore=-0.247067
[LightGBM] [Info] Start training from score -0.247067


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3111, number of negative: 3776
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000316 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.451721 -> initscore=-0.193721
[LightGBM] [Info] Start training from score -0.193721




[LightGBM] [Info] Number of positive: 3111, number of negative: 3776
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000236 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.451721 -> initscore=-0.193721
[LightGBM] [Info] Start training from score -0.193721




[LightGBM] [Info] Number of positive: 3111, number of negative: 3776
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000235 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.451721 -> initscore=-0.193721
[LightGBM] [Info] Start training from score -0.193721




[LightGBM] [Info] Number of positive: 3111, number of negative: 3776
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000206 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.451721 -> initscore=-0.193721
[LightGBM] [Info] Start training from score -0.193721




[LightGBM] [Info] Number of positive: 3112, number of negative: 3776
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000223 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6888, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.451800 -> initscore=-0.193400
[LightGBM] [Info] Start training from score -0.193400


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3889, number of negative: 4720
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000222 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 8609, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.451737 -> initscore=-0.193657
[LightGBM] [Info] Start training from score -0.193657


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3161, number of negative: 3726
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000626 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458981 -> initscore=-0.164447
[LightGBM] [Info] Start training from score -0.164447




[LightGBM] [Info] Number of positive: 3161, number of negative: 3726
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000459 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458981 -> initscore=-0.164447
[LightGBM] [Info] Start training from score -0.164447




[LightGBM] [Info] Number of positive: 3162, number of negative: 3725
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000457 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.459126 -> initscore=-0.163862
[LightGBM] [Info] Start training from score -0.163862




[LightGBM] [Info] Number of positive: 3162, number of negative: 3725
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000490 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.459126 -> initscore=-0.163862
[LightGBM] [Info] Start training from score -0.163862




[LightGBM] [Info] Number of positive: 3162, number of negative: 3726
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000608 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6888, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.459059 -> initscore=-0.164131
[LightGBM] [Info] Start training from score -0.164131


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3952, number of negative: 4657
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000524 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 8609, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.459054 -> initscore=-0.164150
[LightGBM] [Info] Start training from score -0.164150


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3024, number of negative: 3863
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000790 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.439088 -> initscore=-0.244864
[LightGBM] [Info] Start training from score -0.244864




[LightGBM] [Info] Number of positive: 3024, number of negative: 3863
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000529 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.439088 -> initscore=-0.244864
[LightGBM] [Info] Start training from score -0.244864




[LightGBM] [Info] Number of positive: 3024, number of negative: 3863
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000489 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.439088 -> initscore=-0.244864
[LightGBM] [Info] Start training from score -0.244864




[LightGBM] [Info] Number of positive: 3024, number of negative: 3863
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000514 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.439088 -> initscore=-0.244864
[LightGBM] [Info] Start training from score -0.244864




[LightGBM] [Info] Number of positive: 3024, number of negative: 3864
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000481 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6888, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.439024 -> initscore=-0.245122
[LightGBM] [Info] Start training from score -0.245122


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 3780, number of negative: 4829
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000755 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 8609, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.439075 -> initscore=-0.244915
[LightGBM] [Info] Start training from score -0.244915


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 2803, number of negative: 4084
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000341 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.406999 -> initscore=-0.376387
[LightGBM] [Info] Start training from score -0.376387




[LightGBM] [Info] Number of positive: 2803, number of negative: 4084
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000190 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.406999 -> initscore=-0.376387
[LightGBM] [Info] Start training from score -0.376387




[LightGBM] [Info] Number of positive: 2803, number of negative: 4084
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000186 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.406999 -> initscore=-0.376387
[LightGBM] [Info] Start training from score -0.376387




[LightGBM] [Info] Number of positive: 2803, number of negative: 4084
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000257 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6887, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.406999 -> initscore=-0.376387
[LightGBM] [Info] Start training from score -0.376387




[LightGBM] [Info] Number of positive: 2804, number of negative: 4084
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000231 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 6888, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.407085 -> initscore=-0.376030
[LightGBM] [Info] Start training from score -0.376030


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
