<a href="https://colab.research.google.com/github/satyajeetsen007/Classification_of-_Loan_Borrowers_Kaggle/blob/main/Classification_of_Loan_Borrowers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import numpy as np
import pandas as pd
import pyxlsb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectFromModel
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

In [None]:
!pip install pyxlsb
!pip install catboost

In [7]:
train = pd.read_excel("training.xlsb", engine='pyxlsb')
test = pd.read_excel("test.xlsb", engine='pyxlsb')

In [8]:
train = train.iloc[:89734,:]
test = test.iloc[:38405,:]

In [None]:
pd.set_option('display.max_columns',None)
train.describe(include='all')

In [None]:
train.dtypes

In [None]:
print(train.shape)
print(test.shape)

In [None]:
### Missing values
print("Missing Values in Training df: ",train.isna().sum().sum())
print("Missing Values in Test df: ",test.isna().sum().sum())

In [None]:
### Categorical Values
cat_var = [cols for cols in train.columns if train[cols].dtype == 'O']
print("No. of categorical values :", len(cat_var) ,'\n', cat_var, '\n')
### Cardinality
for cols in cat_var:
    print("Column Name: ",cols, "Unique Values: ",train[cols].unique(), "Count of Unique Values: ", len(train[cols].unique()),
          "Count : ", train[cols].value_counts(), "Percentage : ", train[cols].value_counts()/len(train[cols]),
          sep = '\n\n', end = '\n\n')
    print('-'*50)

In [14]:
### Rename the Known Columns
new_cols = {'I':'Gender','K':'Region', 'M':'Job_Title', 'N':'Education', 'O':'Marital_Status',
            'P':'Children', 'Q':'Property', 'S':'Employment_Status'}

In [15]:
train.rename(columns = new_cols, inplace=True)
test.rename(columns = new_cols, inplace=True)

In [None]:
### Numerical Variables
num_var = [cols for cols in train.columns if train[cols].dtype != 'O']
print("No. of numerical values :", len(num_var) ,'\n', num_var, '\n')
### Distribution
sns.pairplot(train)
plt.show()

In [None]:
correlation = train.corr()
correlation.style.background_gradient(cmap='coolwarm')

In [None]:
for cols in num_var:
    sns.boxplot(train[cols])
    plt.title(cols)
    plt.show()

In [None]:
### Imbalance Data??
print(train['MARKER'].value_counts())
print(test['MARKER'].value_counts())

In [20]:
### One-HOT Encoding
X = pd.get_dummies(train, drop_first=True)
y = pd.get_dummies(test, drop_first=True)

In [21]:
### Removing Columns to tackle Multi Collinearity (Corr > 0.6)
X = X.drop(['A','D','F'],axis =1)
y = y.drop(['A','D','F'],axis =1)

In [None]:
correlation = X.corr()
correlation.style.background_gradient(cmap='coolwarm')

In [23]:
### Splitting the data - X,y - Train and Test
X_train = X.drop(['ID','MARKER'], axis =1)
y_train = X['MARKER']
X_test = y.drop(['ID','MARKER'], axis =1)
y_test = y['MARKER']

In [24]:
### Scaling the data
scaler = MinMaxScaler()
scaler.fit(X_train)

X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

In [None]:
print(len(X_train))
### Imbalance Data treatment
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
print(len(X_train_resampled))
#print(X_train_resampled.head())

In [None]:
### Feature Selection
#sel_from_model = SelectFromModel(Lasso(alpha=0.1,random_state=12))
#sel_from_model.fit(X_train_resampled,y_train_resampled)

In [None]:
'''selected_feat = X_train_resampled.columns[(sel_from_model.get_support())]

# let's print some stats
print('selected features: {}'.format(selected_feat))
print('total features: {}'.format((X_train_resampled.shape[1])))
print('selected features: {}'.format(len(selected_feat)))
print('features with coefficients shrank to zero: {}'.format(
    np.sum(sel_from_model.estimator_.coef_ == 0)))'''

In [None]:
### Trying different data transformations

colss = ([x for x in X_train_resampled.columns if X_train_resampled[x].dtype == float])
print(X_train_resampled[colss].head())
X_train_resampled[colss] = np.absolute(X_train_resampled[colss])
print(X_train_resampled.head())
#'''print(X_train_resampled[colss].head())
#X_train_resampled[colss] = X_train_resampled[colss]**2
#print(X_train_resampled[colss].head())'''

In [30]:
#### Applying Logistic Reg
from  sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve, precision_score,recall_score,f1_score,classification_report

In [None]:
params = {"penalty": ["l1", "l2", "elasticnet", "none"],"C": [1.0, 1.5, 0.5],
          "solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"],"max_iter": [100, 150, 180]}

log_model = LogisticRegression()
model1 = GridSearchCV(estimator=log_model, param_grid=params,scoring = "recall", refit = True,verbose = 4, cv = 2)
model1.fit(X_train_resampled,y_train_resampled)

In [53]:
model1.best_estimator_

In [None]:
model2 = GridSearchCV(estimator=log_model, param_grid=params,scoring = "precision", refit = True,verbose = 4, cv = 2)
model2.fit(X_train_resampled,y_train_resampled)

In [55]:
model2.best_estimator_

In [57]:
model3 = GridSearchCV(estimator=log_model, param_grid=params,scoring = "f1", refit = True,verbose = 4, cv = 2)
model3.fit(X_train_resampled,y_train_resampled)

Fitting 2 folds for each of 180 candidates, totalling 360 fits
[CV 1/2] END C=1.0, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=100, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=100, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=100, penalty=l1, solver=liblinear;, score=0.794 total time=  10.0s
[CV 2/2] END C=1.0, max_iter=100, penalty=l1, solver=liblinear;, score=0.846 total time=  11.8s
[CV 1/2] END C=1.0, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=100, penalty=l1, solver=saga;, score=0.794 total time=   7.5s
[CV 2/2] END C=1.0, max_iter=100, penalty=l1, solver=saga;, score=0.846 total time=   5.9s
[CV 1/2] END C=1.0,

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.0, max_iter=100, penalty=l2, solver=lbfgs;, score=0.794 total time=   1.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.0, max_iter=100, penalty=l2, solver=lbfgs;, score=0.845 total time=   1.7s
[CV 1/2] END C=1.0, max_iter=100, penalty=l2, solver=liblinear;, score=0.794 total time=   1.4s
[CV 2/2] END C=1.0, max_iter=100, penalty=l2, solver=liblinear;, score=0.845 total time=   1.7s
[CV 1/2] END C=1.0, max_iter=100, penalty=l2, solver=sag;, score=0.794 total time=   2.5s
[CV 2/2] END C=1.0, max_iter=100, penalty=l2, solver=sag;, score=0.845 total time=   2.3s
[CV 1/2] END C=1.0, max_iter=100, penalty=l2, solver=saga;, score=0.794 total time=   2.4s
[CV 2/2] END C=1.0, max_iter=100, penalty=l2, solver=saga;, score=0.845 total time=   2.4s
[CV 1/2] END C=1.0, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=100, penalty=elasticnet, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=100, penalty=elasticnet



[CV 1/2] END C=1.0, max_iter=100, penalty=none, solver=newton-cg;, score=0.794 total time=  10.4s




[CV 2/2] END C=1.0, max_iter=100, penalty=none, solver=newton-cg;, score=0.846 total time=  11.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.0, max_iter=100, penalty=none, solver=lbfgs;, score=0.794 total time=   1.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.0, max_iter=100, penalty=none, solver=lbfgs;, score=0.845 total time=   1.6s
[CV 1/2] END C=1.0, max_iter=100, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=100, penalty=none, solver=liblinear;, score=nan total time=   0.0s




[CV 1/2] END C=1.0, max_iter=100, penalty=none, solver=sag;, score=0.794 total time=   9.5s




[CV 2/2] END C=1.0, max_iter=100, penalty=none, solver=sag;, score=0.846 total time=   9.3s




[CV 1/2] END C=1.0, max_iter=100, penalty=none, solver=saga;, score=0.794 total time=   9.8s




[CV 2/2] END C=1.0, max_iter=100, penalty=none, solver=saga;, score=0.846 total time=   8.9s
[CV 1/2] END C=1.0, max_iter=150, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=150, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=150, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=150, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=150, penalty=l1, solver=liblinear;, score=0.794 total time=   9.8s
[CV 2/2] END C=1.0, max_iter=150, penalty=l1, solver=liblinear;, score=0.846 total time=  11.4s
[CV 1/2] END C=1.0, max_iter=150, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=150, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=150, penalty=l1, solver=saga;, score=0.794 total time=   7.8s
[CV 2/2] END C=1.0, max_iter=150, penalty=l1, solver=saga;, score=0.846 total ti

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.0, max_iter=150, penalty=l2, solver=lbfgs;, score=0.794 total time=   2.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.0, max_iter=150, penalty=l2, solver=lbfgs;, score=0.845 total time=   2.5s
[CV 1/2] END C=1.0, max_iter=150, penalty=l2, solver=liblinear;, score=0.794 total time=   1.1s
[CV 2/2] END C=1.0, max_iter=150, penalty=l2, solver=liblinear;, score=0.845 total time=   1.3s
[CV 1/2] END C=1.0, max_iter=150, penalty=l2, solver=sag;, score=0.794 total time=   2.4s
[CV 2/2] END C=1.0, max_iter=150, penalty=l2, solver=sag;, score=0.845 total time=   3.3s
[CV 1/2] END C=1.0, max_iter=150, penalty=l2, solver=saga;, score=0.794 total time=   2.4s
[CV 2/2] END C=1.0, max_iter=150, penalty=l2, solver=saga;, score=0.845 total time=   2.5s
[CV 1/2] END C=1.0, max_iter=150, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=150, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=150, penalty=elasticnet, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=150, penalty=elasticnet



[CV 1/2] END C=1.0, max_iter=150, penalty=none, solver=newton-cg;, score=0.794 total time=  10.1s




[CV 2/2] END C=1.0, max_iter=150, penalty=none, solver=newton-cg;, score=0.846 total time=  11.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.0, max_iter=150, penalty=none, solver=lbfgs;, score=0.794 total time=   2.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.0, max_iter=150, penalty=none, solver=lbfgs;, score=0.846 total time=   2.4s
[CV 1/2] END C=1.0, max_iter=150, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=150, penalty=none, solver=liblinear;, score=nan total time=   0.0s




[CV 1/2] END C=1.0, max_iter=150, penalty=none, solver=sag;, score=0.794 total time=  13.7s




[CV 2/2] END C=1.0, max_iter=150, penalty=none, solver=sag;, score=0.846 total time=  13.8s




[CV 1/2] END C=1.0, max_iter=150, penalty=none, solver=saga;, score=0.794 total time=  14.8s




[CV 2/2] END C=1.0, max_iter=150, penalty=none, solver=saga;, score=0.846 total time=  15.1s
[CV 1/2] END C=1.0, max_iter=180, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=180, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=180, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=180, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=180, penalty=l1, solver=liblinear;, score=0.794 total time=  10.9s
[CV 2/2] END C=1.0, max_iter=180, penalty=l1, solver=liblinear;, score=0.846 total time=  12.2s
[CV 1/2] END C=1.0, max_iter=180, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=180, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 1/2] END C=1.0, max_iter=180, penalty=l1, solver=saga;, score=0.794 total time=   7.0s
[CV 2/2] END C=1.0, max_iter=180, penalty=l1, solver=saga;, score=0.846 total ti



[CV 1/2] END C=1.0, max_iter=180, penalty=none, solver=newton-cg;, score=0.794 total time=   8.9s




[CV 2/2] END C=1.0, max_iter=180, penalty=none, solver=newton-cg;, score=0.846 total time=  11.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.0, max_iter=180, penalty=none, solver=lbfgs;, score=0.794 total time=   3.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.0, max_iter=180, penalty=none, solver=lbfgs;, score=0.846 total time=   3.2s
[CV 1/2] END C=1.0, max_iter=180, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/2] END C=1.0, max_iter=180, penalty=none, solver=liblinear;, score=nan total time=   0.0s




[CV 1/2] END C=1.0, max_iter=180, penalty=none, solver=sag;, score=0.794 total time=  16.4s




[CV 2/2] END C=1.0, max_iter=180, penalty=none, solver=sag;, score=0.846 total time=  16.5s




[CV 1/2] END C=1.0, max_iter=180, penalty=none, solver=saga;, score=0.794 total time=  17.8s




[CV 2/2] END C=1.0, max_iter=180, penalty=none, solver=saga;, score=0.846 total time=  17.1s
[CV 1/2] END C=1.5, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=100, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=100, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=100, penalty=l1, solver=liblinear;, score=0.794 total time=  18.2s
[CV 2/2] END C=1.5, max_iter=100, penalty=l1, solver=liblinear;, score=0.846 total time=  11.1s
[CV 1/2] END C=1.5, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=100, penalty=l1, solver=saga;, score=0.794 total time=   9.8s
[CV 2/2] END C=1.5, max_iter=100, penalty=l1, solver=saga;, score=0.846 total ti

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.5, max_iter=100, penalty=l2, solver=lbfgs;, score=0.795 total time=   1.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.5, max_iter=100, penalty=l2, solver=lbfgs;, score=0.846 total time=   1.7s
[CV 1/2] END C=1.5, max_iter=100, penalty=l2, solver=liblinear;, score=0.794 total time=   1.2s
[CV 2/2] END C=1.5, max_iter=100, penalty=l2, solver=liblinear;, score=0.846 total time=   1.2s
[CV 1/2] END C=1.5, max_iter=100, penalty=l2, solver=sag;, score=0.794 total time=   2.3s
[CV 2/2] END C=1.5, max_iter=100, penalty=l2, solver=sag;, score=0.846 total time=   3.0s
[CV 1/2] END C=1.5, max_iter=100, penalty=l2, solver=saga;, score=0.794 total time=   2.7s
[CV 2/2] END C=1.5, max_iter=100, penalty=l2, solver=saga;, score=0.846 total time=   3.3s
[CV 1/2] END C=1.5, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=100, penalty=elasticnet, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=100, penalty=elasticnet



[CV 1/2] END C=1.5, max_iter=100, penalty=none, solver=newton-cg;, score=0.794 total time=  10.2s




[CV 2/2] END C=1.5, max_iter=100, penalty=none, solver=newton-cg;, score=0.846 total time=  11.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.5, max_iter=100, penalty=none, solver=lbfgs;, score=0.794 total time=   1.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.5, max_iter=100, penalty=none, solver=lbfgs;, score=0.845 total time=   1.7s
[CV 1/2] END C=1.5, max_iter=100, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=100, penalty=none, solver=liblinear;, score=nan total time=   0.0s




[CV 1/2] END C=1.5, max_iter=100, penalty=none, solver=sag;, score=0.794 total time=  10.3s




[CV 2/2] END C=1.5, max_iter=100, penalty=none, solver=sag;, score=0.846 total time=  10.3s




[CV 1/2] END C=1.5, max_iter=100, penalty=none, solver=saga;, score=0.794 total time=  10.4s




[CV 2/2] END C=1.5, max_iter=100, penalty=none, solver=saga;, score=0.846 total time=  10.3s
[CV 1/2] END C=1.5, max_iter=150, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=150, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=150, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=150, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=150, penalty=l1, solver=liblinear;, score=0.794 total time=   9.2s
[CV 2/2] END C=1.5, max_iter=150, penalty=l1, solver=liblinear;, score=0.846 total time=  12.0s
[CV 1/2] END C=1.5, max_iter=150, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=150, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=150, penalty=l1, solver=saga;, score=0.794 total time=  10.7s
[CV 2/2] END C=1.5, max_iter=150, penalty=l1, solver=saga;, score=0.846 total ti

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.5, max_iter=150, penalty=l2, solver=lbfgs;, score=0.794 total time=   2.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.5, max_iter=150, penalty=l2, solver=lbfgs;, score=0.846 total time=   2.9s
[CV 1/2] END C=1.5, max_iter=150, penalty=l2, solver=liblinear;, score=0.794 total time=   1.6s
[CV 2/2] END C=1.5, max_iter=150, penalty=l2, solver=liblinear;, score=0.846 total time=   1.3s
[CV 1/2] END C=1.5, max_iter=150, penalty=l2, solver=sag;, score=0.794 total time=   2.4s
[CV 2/2] END C=1.5, max_iter=150, penalty=l2, solver=sag;, score=0.846 total time=   2.6s
[CV 1/2] END C=1.5, max_iter=150, penalty=l2, solver=saga;, score=0.794 total time=   2.4s
[CV 2/2] END C=1.5, max_iter=150, penalty=l2, solver=saga;, score=0.846 total time=   4.4s
[CV 1/2] END C=1.5, max_iter=150, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=150, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=150, penalty=elasticnet, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=150, penalty=elasticnet



[CV 1/2] END C=1.5, max_iter=150, penalty=none, solver=newton-cg;, score=0.794 total time=   8.9s




[CV 2/2] END C=1.5, max_iter=150, penalty=none, solver=newton-cg;, score=0.846 total time=  11.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.5, max_iter=150, penalty=none, solver=lbfgs;, score=0.794 total time=   2.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.5, max_iter=150, penalty=none, solver=lbfgs;, score=0.846 total time=   3.3s
[CV 1/2] END C=1.5, max_iter=150, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=150, penalty=none, solver=liblinear;, score=nan total time=   0.0s




[CV 1/2] END C=1.5, max_iter=150, penalty=none, solver=sag;, score=0.794 total time=  13.6s




[CV 2/2] END C=1.5, max_iter=150, penalty=none, solver=sag;, score=0.846 total time=  13.7s




[CV 1/2] END C=1.5, max_iter=150, penalty=none, solver=saga;, score=0.794 total time=  14.3s




[CV 2/2] END C=1.5, max_iter=150, penalty=none, solver=saga;, score=0.846 total time=  14.4s
[CV 1/2] END C=1.5, max_iter=180, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=180, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=180, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=180, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=180, penalty=l1, solver=liblinear;, score=0.794 total time=   9.2s
[CV 2/2] END C=1.5, max_iter=180, penalty=l1, solver=liblinear;, score=0.846 total time=  11.0s
[CV 1/2] END C=1.5, max_iter=180, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=180, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=180, penalty=l1, solver=saga;, score=0.794 total time=  10.5s
[CV 2/2] END C=1.5, max_iter=180, penalty=l1, solver=saga;, score=0.846 total ti

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.5, max_iter=180, penalty=l2, solver=lbfgs;, score=0.794 total time=   2.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.5, max_iter=180, penalty=l2, solver=lbfgs;, score=0.846 total time=   3.0s
[CV 1/2] END C=1.5, max_iter=180, penalty=l2, solver=liblinear;, score=0.794 total time=   1.3s
[CV 2/2] END C=1.5, max_iter=180, penalty=l2, solver=liblinear;, score=0.846 total time=   1.6s
[CV 1/2] END C=1.5, max_iter=180, penalty=l2, solver=sag;, score=0.794 total time=   2.6s
[CV 2/2] END C=1.5, max_iter=180, penalty=l2, solver=sag;, score=0.846 total time=   2.9s
[CV 1/2] END C=1.5, max_iter=180, penalty=l2, solver=saga;, score=0.794 total time=   2.5s
[CV 2/2] END C=1.5, max_iter=180, penalty=l2, solver=saga;, score=0.846 total time=   3.6s
[CV 1/2] END C=1.5, max_iter=180, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.1s
[CV 2/2] END C=1.5, max_iter=180, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=1.5, max_iter=180, penalty=elasticnet, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=180, penalty=elasticnet



[CV 1/2] END C=1.5, max_iter=180, penalty=none, solver=newton-cg;, score=0.794 total time=   9.7s




[CV 2/2] END C=1.5, max_iter=180, penalty=none, solver=newton-cg;, score=0.846 total time=  11.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=1.5, max_iter=180, penalty=none, solver=lbfgs;, score=0.794 total time=   2.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=1.5, max_iter=180, penalty=none, solver=lbfgs;, score=0.846 total time=   4.2s
[CV 1/2] END C=1.5, max_iter=180, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/2] END C=1.5, max_iter=180, penalty=none, solver=liblinear;, score=nan total time=   0.0s




[CV 1/2] END C=1.5, max_iter=180, penalty=none, solver=sag;, score=0.794 total time=  16.3s




[CV 2/2] END C=1.5, max_iter=180, penalty=none, solver=sag;, score=0.846 total time=  16.3s




[CV 1/2] END C=1.5, max_iter=180, penalty=none, solver=saga;, score=0.794 total time=  17.8s




[CV 2/2] END C=1.5, max_iter=180, penalty=none, solver=saga;, score=0.846 total time=  16.9s
[CV 1/2] END C=0.5, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=100, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=100, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=100, penalty=l1, solver=liblinear;, score=0.794 total time=   8.7s
[CV 2/2] END C=0.5, max_iter=100, penalty=l1, solver=liblinear;, score=0.846 total time=  15.2s
[CV 1/2] END C=0.5, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=100, penalty=l1, solver=saga;, score=0.794 total time=   3.6s
[CV 2/2] END C=0.5, max_iter=100, penalty=l1, solver=saga;, score=0.846 total ti

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=0.5, max_iter=100, penalty=l2, solver=lbfgs;, score=0.794 total time=   1.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=0.5, max_iter=100, penalty=l2, solver=lbfgs;, score=0.845 total time=   1.7s
[CV 1/2] END C=0.5, max_iter=100, penalty=l2, solver=liblinear;, score=0.794 total time=   1.5s
[CV 2/2] END C=0.5, max_iter=100, penalty=l2, solver=liblinear;, score=0.845 total time=   1.9s
[CV 1/2] END C=0.5, max_iter=100, penalty=l2, solver=sag;, score=0.794 total time=   2.2s
[CV 2/2] END C=0.5, max_iter=100, penalty=l2, solver=sag;, score=0.845 total time=   2.0s
[CV 1/2] END C=0.5, max_iter=100, penalty=l2, solver=saga;, score=0.794 total time=   2.2s
[CV 2/2] END C=0.5, max_iter=100, penalty=l2, solver=saga;, score=0.845 total time=   1.8s
[CV 1/2] END C=0.5, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=100, penalty=elasticnet, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=100, penalty=elasticnet



[CV 1/2] END C=0.5, max_iter=100, penalty=none, solver=newton-cg;, score=0.794 total time=   9.9s




[CV 2/2] END C=0.5, max_iter=100, penalty=none, solver=newton-cg;, score=0.846 total time=  11.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=0.5, max_iter=100, penalty=none, solver=lbfgs;, score=0.794 total time=   1.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=0.5, max_iter=100, penalty=none, solver=lbfgs;, score=0.845 total time=   1.7s
[CV 1/2] END C=0.5, max_iter=100, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=100, penalty=none, solver=liblinear;, score=nan total time=   0.0s




[CV 1/2] END C=0.5, max_iter=100, penalty=none, solver=sag;, score=0.794 total time=   9.5s




[CV 2/2] END C=0.5, max_iter=100, penalty=none, solver=sag;, score=0.846 total time=   9.3s




[CV 1/2] END C=0.5, max_iter=100, penalty=none, solver=saga;, score=0.794 total time=   9.1s




[CV 2/2] END C=0.5, max_iter=100, penalty=none, solver=saga;, score=0.846 total time=   9.9s
[CV 1/2] END C=0.5, max_iter=150, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=150, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=150, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=150, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=150, penalty=l1, solver=liblinear;, score=0.794 total time=  12.5s
[CV 2/2] END C=0.5, max_iter=150, penalty=l1, solver=liblinear;, score=0.846 total time=  19.7s
[CV 1/2] END C=0.5, max_iter=150, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=150, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=150, penalty=l1, solver=saga;, score=0.794 total time=   3.8s
[CV 2/2] END C=0.5, max_iter=150, penalty=l1, solver=saga;, score=0.846 total ti

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=0.5, max_iter=150, penalty=l2, solver=lbfgs;, score=0.794 total time=   2.4s
[CV 2/2] END C=0.5, max_iter=150, penalty=l2, solver=lbfgs;, score=0.845 total time=   2.4s
[CV 1/2] END C=0.5, max_iter=150, penalty=l2, solver=liblinear;, score=0.794 total time=   1.1s
[CV 2/2] END C=0.5, max_iter=150, penalty=l2, solver=liblinear;, score=0.845 total time=   1.3s
[CV 1/2] END C=0.5, max_iter=150, penalty=l2, solver=sag;, score=0.794 total time=   3.0s
[CV 2/2] END C=0.5, max_iter=150, penalty=l2, solver=sag;, score=0.845 total time=   2.1s
[CV 1/2] END C=0.5, max_iter=150, penalty=l2, solver=saga;, score=0.794 total time=   2.2s
[CV 2/2] END C=0.5, max_iter=150, penalty=l2, solver=saga;, score=0.845 total time=   1.9s
[CV 1/2] END C=0.5, max_iter=150, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=150, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=150, penalty=elasticnet, solv



[CV 1/2] END C=0.5, max_iter=150, penalty=none, solver=newton-cg;, score=0.794 total time=  10.3s




[CV 2/2] END C=0.5, max_iter=150, penalty=none, solver=newton-cg;, score=0.846 total time=  11.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=0.5, max_iter=150, penalty=none, solver=lbfgs;, score=0.794 total time=   2.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=0.5, max_iter=150, penalty=none, solver=lbfgs;, score=0.846 total time=   2.5s
[CV 1/2] END C=0.5, max_iter=150, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=150, penalty=none, solver=liblinear;, score=nan total time=   0.0s




[CV 1/2] END C=0.5, max_iter=150, penalty=none, solver=sag;, score=0.794 total time=  14.1s




[CV 2/2] END C=0.5, max_iter=150, penalty=none, solver=sag;, score=0.846 total time=  14.4s




[CV 1/2] END C=0.5, max_iter=150, penalty=none, solver=saga;, score=0.794 total time=  14.9s




[CV 2/2] END C=0.5, max_iter=150, penalty=none, solver=saga;, score=0.846 total time=  14.5s
[CV 1/2] END C=0.5, max_iter=180, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=180, penalty=l1, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=180, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=180, penalty=l1, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=180, penalty=l1, solver=liblinear;, score=0.794 total time=   6.5s
[CV 2/2] END C=0.5, max_iter=180, penalty=l1, solver=liblinear;, score=0.846 total time=  11.3s
[CV 1/2] END C=0.5, max_iter=180, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=180, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 1/2] END C=0.5, max_iter=180, penalty=l1, solver=saga;, score=0.794 total time=   5.7s
[CV 2/2] END C=0.5, max_iter=180, penalty=l1, solver=saga;, score=0.846 total ti



[CV 1/2] END C=0.5, max_iter=180, penalty=none, solver=newton-cg;, score=0.794 total time=  10.3s




[CV 2/2] END C=0.5, max_iter=180, penalty=none, solver=newton-cg;, score=0.846 total time=  11.1s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/2] END C=0.5, max_iter=180, penalty=none, solver=lbfgs;, score=0.794 total time=   3.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/2] END C=0.5, max_iter=180, penalty=none, solver=lbfgs;, score=0.846 total time=   3.0s
[CV 1/2] END C=0.5, max_iter=180, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/2] END C=0.5, max_iter=180, penalty=none, solver=liblinear;, score=nan total time=   0.0s




[CV 1/2] END C=0.5, max_iter=180, penalty=none, solver=sag;, score=0.794 total time=  16.5s




[CV 2/2] END C=0.5, max_iter=180, penalty=none, solver=sag;, score=0.846 total time=  17.6s




[CV 1/2] END C=0.5, max_iter=180, penalty=none, solver=saga;, score=0.794 total time=  18.1s


162 fits failed out of a total of 360.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
18 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 1162, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 54, in _check_solver
    raise ValueError(
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

--------------------------------

[CV 2/2] END C=0.5, max_iter=180, penalty=none, solver=saga;, score=0.846 total time=  18.0s


In [58]:
model3.best_estimator_

In [None]:
model1_be = LogisticRegression(C=1.5)
model1_be.fit(X_train_resampled,y_train_resampled)

In [None]:
model2_be = LogisticRegression(max_iter=180, penalty='none')
model2_be.fit(X_train_resampled,y_train_resampled)

In [None]:
model3_be = LogisticRegression(penalty='none', solver='newton-cg')
model3_be.fit(X_train_resampled,y_train_resampled)

In [60]:
### Results of M1 model

y_pred_m1 = model1_be.predict(X_test)
y_train_predict_m1 = model1_be.predict(X_train_resampled)
print("--"*5,"Results of M1 model","--"*5)
print("Accuracy Score of Model1_train : ", accuracy_score(y_train_resampled,y_train_predict_m1))
print("Accuracy Score of Model1_test : ", accuracy_score(y_test,y_pred_m1))
print('--'*5,"Training Data",'--'*5)
print(classification_report(y_test, y_pred_m1))
print(confusion_matrix(y_test, y_pred_m1))
print('--'*5,"Test Data",'--'*5)
print(classification_report(y_train_resampled, y_train_predict_m1))
print(confusion_matrix(y_train_resampled, y_train_predict_m1))

---------- Results of M1 model ----------
Accuracy Score of Model1_train :  0.8365681492751029
Accuracy Score of Model1_test :  0.5601353990365838
---------- Training Data ----------
              precision    recall  f1-score   support

           0       1.00      0.56      0.72     38259
           1       0.01      0.61      0.01       146

    accuracy                           0.56     38405
   macro avg       0.50      0.58      0.36     38405
weighted avg       0.99      0.56      0.71     38405

[[21423 16836]
 [   57    89]]
---------- Test Data ----------
              precision    recall  f1-score   support

           0       0.85      0.81      0.83     89392
           1       0.82      0.86      0.84     89392

    accuracy                           0.84    178784
   macro avg       0.84      0.84      0.84    178784
weighted avg       0.84      0.84      0.84    178784

[[72756 16636]
 [12583 76809]]


In [48]:
### Results of M2 model

y_pred_m2 = model2_be.predict(X_test)
y_train_predict_m2 = model2_be.predict(X_train_resampled)
print("--"*5,"Results of M2 model","--"*5)
print("Accuracy Score of Model2_train : ", accuracy_score(y_train_resampled,y_train_predict_m2))
print("Accuracy Score of Model2_test : ", accuracy_score(y_test,y_pred_m2))
print('--'*5,"Training Data",'--'*5)
print(classification_report(y_test, y_pred_m2))
print(confusion_matrix(y_test, y_pred_m2))
print('--'*5,"Test Data",'--'*5)
print(classification_report(y_train_resampled, y_train_predict_m2))
print(confusion_matrix(y_train_resampled, y_train_predict_m2))

---------- Results of M2 model ----------
Accuracy Score of Model2_train :  0.8368534097010918
Accuracy Score of Model2_test :  0.5601093607603177
---------- Training Data ----------
              precision    recall  f1-score   support

           0       1.00      0.56      0.72     38259
           1       0.01      0.62      0.01       146

    accuracy                           0.56     38405
   macro avg       0.50      0.59      0.36     38405
weighted avg       0.99      0.56      0.71     38405

[[21421 16838]
 [   56    90]]
---------- Test Data ----------
              precision    recall  f1-score   support

           0       0.85      0.81      0.83     89392
           1       0.82      0.86      0.84     89392

    accuracy                           0.84    178784
   macro avg       0.84      0.84      0.84    178784
weighted avg       0.84      0.84      0.84    178784

[[72783 16609]
 [12559 76833]]


In [62]:
### Results of M3 model

y_pred_m3 = model3_be.predict(X_test)
y_train_predict_m3 = model3_be.predict(X_train_resampled)
print("--"*5,"Results of M3 model","--"*5)
print("Accuracy Score of Model3_train : ", accuracy_score(y_train_resampled,y_train_predict_m3))
print("Accuracy Score of Model3_test : ", accuracy_score(y_test,y_pred_m3))
print('--'*5,"Training Data",'--'*5)
print(classification_report(y_test, y_pred_m3))
print(confusion_matrix(y_test, y_pred_m3))
print('--'*5,"Test Data",'--'*5)
print(classification_report(y_train_resampled, y_train_predict_m3))
print(confusion_matrix(y_train_resampled, y_train_predict_m3))

---------- Results of M3 model ----------
Accuracy Score of Model3_train :  0.836875783067836
Accuracy Score of Model3_test :  0.559953131102721
---------- Training Data ----------
              precision    recall  f1-score   support

           0       1.00      0.56      0.72     38259
           1       0.01      0.62      0.01       146

    accuracy                           0.56     38405
   macro avg       0.50      0.59      0.36     38405
weighted avg       0.99      0.56      0.71     38405

[[21415 16844]
 [   56    90]]
---------- Test Data ----------
              precision    recall  f1-score   support

           0       0.85      0.81      0.83     89392
           1       0.82      0.86      0.84     89392

    accuracy                           0.84    178784
   macro avg       0.84      0.84      0.84    178784
weighted avg       0.84      0.84      0.84    178784

[[72784 16608]
 [12556 76836]]


In [None]:
#### Applying SVM
from  sklearn import svm
model4 = svm.SVC(kernel="poly", degree =1, decision_function_shape='ovo',gamma = 'auto')
model4.fit(X_train_resampled,y_train_resampled)

In [None]:
### Results of M4 model

y_pred_m4 = model4.predict(X_test)
y_train_predict_m4 = model4.predict(X_train_resampled)
print("--"*5,"Results of M4 model","--"*5)
print("Accuracy Score of Model4_train : ", accuracy_score(y_train_resampled,y_train_predict_m4))
print("Accuracy Score of Model4_test : ", accuracy_score(y_test,y_pred_m4))
print('--'*5,"Training Data",'--'*5)
print(classification_report(y_test, y_pred_m4))
print(confusion_matrix(y_test, y_pred_m4))
print('--'*5,"Test Data",'--'*5)
print(classification_report(y_train_resampled, y_train_predict_m4))
print(confusion_matrix(y_train_resampled, y_train_predict_m4))

In [None]:
from sklearn.model_selection import cross_val_score
score = cross_val_score(model1,X_test,y_test,cv=10)

In [None]:
score

In [None]:
## Apply RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
model5 = RandomForestClassifier()
model5.fit(X_train_resampled, y_train_resampled)

In [None]:
### Results of M5 model

y_pred_m5 = model5.predict(X_test)
y_train_predict_m5 = model5.predict(X_train_resampled)
print("--"*5,"Results of M5 model","--"*5)
print("Accuracy Score of Model5_train : ", accuracy_score(y_train_resampled,y_train_predict_m5))
print("Accuracy Score of Model5_test : ", accuracy_score(y_test,y_pred_m5))
print('--'*5,"Training Data",'--'*5)
print(classification_report(y_test, y_pred_m5))
print(confusion_matrix(y_test, y_pred_m5))
print('--'*5,"Test Data",'--'*5)
print(classification_report(y_train_resampled, y_train_predict_m5))
print(confusion_matrix(y_train_resampled, y_train_predict_m5))

In [None]:
## Apply LGBM Classifier
lgb_model = LGBMClassifier(n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42, n_jobs=1)
lgb_model.fit(X_train_resampled, y_train_resampled)

In [None]:
y_pred = lgb_model.predict(X_test)
y_train_predict = lgb_model.predict(X_train_resampled)

print("Accuracy Score of Model1_train : ", accuracy_score(y_train_resampled,y_train_predict))
print("Accuracy Score of Model1_test : ", accuracy_score(y_test,y_pred))

In [None]:
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print('--'*50)
print(classification_report(y_train_resampled, y_train_predict))
print(confusion_matrix(y_train_resampled, y_train_predict))

In [None]:
from sklearn.ensemble import AdaBoostClassifier
ada_classifier=AdaBoostClassifier()
ada_classifier.fit(X_train_resampled, y_train_resampled)

In [None]:
y_pred = ada_classifier.predict(X_test)
y_train_predict = ada_classifier.predict(X_train_resampled)

In [None]:
print("Accuracy Score of Model1_train : ", accuracy_score(y_train_resampled,y_train_predict))
print("Accuracy Score of Model1_test : ", accuracy_score(y_test,y_pred))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn_classifier=KNeighborsClassifier()
knn_classifier.fit(X_train, y_train)

In [None]:
y_pred = knn_classifier.predict(X_test)
y_train_predict = knn_classifier.predict(X_train_resampled)

In [None]:
print("Accuracy Score of Model1_train : ", accuracy_score(y_train_resampled,y_train_predict))
print("Accuracy Score of Model1_test : ", accuracy_score(y_test,y_pred))

In [None]:
import xgboost
xgb_m=xgboost.XGBClassifier()
xgb_m.fit(X_train_resampled, y_train_resampled)

In [None]:
y_pred = xgb_m.predict(X_test)
y_train_predict = xgb_m.predict(X_train_resampled)

In [None]:
print("Accuracy Score of Model1_train : ", accuracy_score(y_train_resampled,y_train_predict))
print("Accuracy Score of Model1_test : ", accuracy_score(y_test,y_pred))

In [None]:
params={
 "learning_rate"    : [0.05, 0.10, 0.15, 0.20, 0.25, 0.30 ] ,
 "max_depth"        : [ 3, 4, 5, 6, 8, 10, 12, 15],
 "min_child_weight" : [ 1, 3, 5, 7 ],
 "gamma"            : [ 0.0, 0.1, 0.2 , 0.3, 0.4 ],
 "colsample_bytree" : [ 0.3, 0.4, 0.5 , 0.7 ]
}

In [None]:
## Hyperparameter optimization using RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(xgb_m,param_distributions=params,n_iter=5,scoring='accuracy',n_jobs=-1,cv=5,verbose=3)
random_search.fit(X_train_resampled,y_train_resampled)

In [None]:
y_pred = random_search.predict(X_test)
y_train_predict = random_search.predict(X_train_resampled)

print("Accuracy Score of Model1_train : ", accuracy_score(y_train_resampled,y_train_predict))
print("Accuracy Score of Model1_test : ", accuracy_score(y_test,y_pred))

In [None]:
base_models = [
    ('catboost', CatBoostClassifier(
        iterations=100,
        learning_rate=0.1,
        depth=6,
        random_state=42
    )),
    ('xgboost', xgboost.XGBClassifier(
        n_estimators=100,
        learning_rate=0.1,
        max_depth=6,
        random_state=42
    )),
    ('lightgbm', LGBMClassifier(
        n_estimators=100,
        learning_rate=0.1,
        random_state=42,
        n_jobs =1
    )),
    ('Logistic',LogisticRegression(
        penalty='none', solver='newton-cg'
    ))
]

In [None]:
meta_model = CatBoostClassifier(
    iterations=100,
    learning_rate=0.1,
    depth=6,
    random_state=42
)

In [None]:
from sklearn.ensemble import StackingClassifier
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model)

In [None]:
stacking_model.fit(X_train_resampled, y_train_resampled)

0:	learn: 0.5617382	total: 51ms	remaining: 5.05s
1:	learn: 0.4666516	total: 99.4ms	remaining: 4.87s
2:	learn: 0.4019319	total: 143ms	remaining: 4.63s
3:	learn: 0.3571307	total: 185ms	remaining: 4.44s
4:	learn: 0.3224676	total: 229ms	remaining: 4.34s
5:	learn: 0.2969781	total: 279ms	remaining: 4.38s
6:	learn: 0.2755221	total: 355ms	remaining: 4.72s
7:	learn: 0.2479807	total: 403ms	remaining: 4.63s
8:	learn: 0.2309756	total: 444ms	remaining: 4.49s
9:	learn: 0.2187999	total: 488ms	remaining: 4.4s
10:	learn: 0.2032517	total: 529ms	remaining: 4.28s
11:	learn: 0.1930926	total: 568ms	remaining: 4.17s
12:	learn: 0.1834596	total: 606ms	remaining: 4.05s
13:	learn: 0.1745252	total: 638ms	remaining: 3.92s
14:	learn: 0.1677816	total: 695ms	remaining: 3.94s
15:	learn: 0.1609010	total: 754ms	remaining: 3.96s
16:	learn: 0.1547764	total: 800ms	remaining: 3.9s
17:	learn: 0.1503973	total: 838ms	remaining: 3.82s
18:	learn: 0.1456393	total: 878ms	remaining: 3.74s
19:	learn: 0.1419347	total: 927ms	remaining



[LightGBM] [Info] Number of positive: 89392, number of negative: 89392
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1851
[LightGBM] [Info] Number of data points in the train set: 178784, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




0:	learn: 0.5263964	total: 52.4ms	remaining: 5.19s
1:	learn: 0.4169071	total: 89.5ms	remaining: 4.39s
2:	learn: 0.3465447	total: 124ms	remaining: 4.02s
3:	learn: 0.2894939	total: 161ms	remaining: 3.87s
4:	learn: 0.2549382	total: 197ms	remaining: 3.75s
5:	learn: 0.2257771	total: 234ms	remaining: 3.66s
6:	learn: 0.2059680	total: 271ms	remaining: 3.6s
7:	learn: 0.1881374	total: 306ms	remaining: 3.52s
8:	learn: 0.1779084	total: 344ms	remaining: 3.48s
9:	learn: 0.1669918	total: 407ms	remaining: 3.67s
10:	learn: 0.1532261	total: 444ms	remaining: 3.59s
11:	learn: 0.1445467	total: 478ms	remaining: 3.51s
12:	learn: 0.1372710	total: 512ms	remaining: 3.43s
13:	learn: 0.1309692	total: 548ms	remaining: 3.37s
14:	learn: 0.1235238	total: 584ms	remaining: 3.31s
15:	learn: 0.1181587	total: 627ms	remaining: 3.29s
16:	learn: 0.1121372	total: 662ms	remaining: 3.23s
17:	learn: 0.1082778	total: 697ms	remaining: 3.17s
18:	learn: 0.1038058	total: 732ms	remaining: 3.12s
19:	learn: 0.0994076	total: 766ms	remain

64:	learn: 0.0585488	total: 2.48s	remaining: 1.33s
65:	learn: 0.0563922	total: 2.51s	remaining: 1.29s
66:	learn: 0.0556716	total: 2.55s	remaining: 1.26s
67:	learn: 0.0549495	total: 2.62s	remaining: 1.23s
68:	learn: 0.0540040	total: 2.66s	remaining: 1.19s
69:	learn: 0.0535683	total: 2.7s	remaining: 1.16s
70:	learn: 0.0529319	total: 2.73s	remaining: 1.11s
71:	learn: 0.0524170	total: 2.77s	remaining: 1.08s
72:	learn: 0.0520889	total: 2.8s	remaining: 1.04s
73:	learn: 0.0516722	total: 2.84s	remaining: 997ms
74:	learn: 0.0513550	total: 2.87s	remaining: 957ms
75:	learn: 0.0510368	total: 2.92s	remaining: 921ms
76:	learn: 0.0507177	total: 2.95s	remaining: 881ms
77:	learn: 0.0502513	total: 2.98s	remaining: 841ms
78:	learn: 0.0497587	total: 3.02s	remaining: 802ms
79:	learn: 0.0493739	total: 3.05s	remaining: 763ms
80:	learn: 0.0489743	total: 3.09s	remaining: 724ms
81:	learn: 0.0484752	total: 3.12s	remaining: 685ms
82:	learn: 0.0476398	total: 3.16s	remaining: 646ms
83:	learn: 0.0471576	total: 3.21s

31:	learn: 0.1085740	total: 1.28s	remaining: 2.71s
32:	learn: 0.1054041	total: 1.32s	remaining: 2.69s
33:	learn: 0.1012448	total: 1.36s	remaining: 2.64s
34:	learn: 0.0996823	total: 1.39s	remaining: 2.59s
35:	learn: 0.0973463	total: 1.43s	remaining: 2.55s
36:	learn: 0.0957952	total: 1.47s	remaining: 2.5s
37:	learn: 0.0940349	total: 1.51s	remaining: 2.46s
38:	learn: 0.0922334	total: 1.54s	remaining: 2.41s
39:	learn: 0.0902531	total: 1.58s	remaining: 2.37s
40:	learn: 0.0883961	total: 1.62s	remaining: 2.33s
41:	learn: 0.0871011	total: 1.66s	remaining: 2.29s
42:	learn: 0.0857308	total: 1.69s	remaining: 2.24s
43:	learn: 0.0844851	total: 1.73s	remaining: 2.2s
44:	learn: 0.0830916	total: 1.76s	remaining: 2.15s
45:	learn: 0.0815318	total: 1.8s	remaining: 2.11s
46:	learn: 0.0807083	total: 1.83s	remaining: 2.06s
47:	learn: 0.0794048	total: 1.87s	remaining: 2.02s
48:	learn: 0.0780138	total: 1.9s	remaining: 1.98s
49:	learn: 0.0768372	total: 1.94s	remaining: 1.94s
50:	learn: 0.0757096	total: 1.97s	r

94:	learn: 0.0466143	total: 3.63s	remaining: 191ms
95:	learn: 0.0461649	total: 3.66s	remaining: 153ms
96:	learn: 0.0459347	total: 3.7s	remaining: 114ms
97:	learn: 0.0456282	total: 3.74s	remaining: 76.4ms
98:	learn: 0.0453239	total: 3.78s	remaining: 38.2ms
99:	learn: 0.0450078	total: 3.83s	remaining: 0us




[LightGBM] [Info] Number of positive: 71514, number of negative: 71513
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1852
[LightGBM] [Info] Number of data points in the train set: 143027, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500003 -> initscore=0.000014
[LightGBM] [Info] Start training from score 0.000014
[LightGBM] [Info] Number of positive: 71514, number of negative: 71513
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1851
[LightGBM] [Info] Number of data points in the train set: 143027, number of used features: 41
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500003 -> initscore=0.000014
[LightGBM] [Info] Start training from score 0.000014
[LightGBM] [Info] Number of positive: 71513, number of negative: 71514
You can set `force_row_



0:	learn: 0.6045113	total: 23.5ms	remaining: 2.32s
1:	learn: 0.5309393	total: 44.4ms	remaining: 2.18s
2:	learn: 0.4702016	total: 64.9ms	remaining: 2.1s
3:	learn: 0.4176977	total: 86.6ms	remaining: 2.08s
4:	learn: 0.3734107	total: 108ms	remaining: 2.06s
5:	learn: 0.3352420	total: 129ms	remaining: 2.03s
6:	learn: 0.3024606	total: 149ms	remaining: 1.98s
7:	learn: 0.2732398	total: 173ms	remaining: 1.99s
8:	learn: 0.2475498	total: 194ms	remaining: 1.96s
9:	learn: 0.2252782	total: 214ms	remaining: 1.92s
10:	learn: 0.2054021	total: 237ms	remaining: 1.91s
11:	learn: 0.1878945	total: 259ms	remaining: 1.9s
12:	learn: 0.1723287	total: 279ms	remaining: 1.87s
13:	learn: 0.1587864	total: 300ms	remaining: 1.84s
14:	learn: 0.1463082	total: 320ms	remaining: 1.81s
15:	learn: 0.1356432	total: 341ms	remaining: 1.79s
16:	learn: 0.1259917	total: 365ms	remaining: 1.78s
17:	learn: 0.1173335	total: 386ms	remaining: 1.76s
18:	learn: 0.1097472	total: 407ms	remaining: 1.73s
19:	learn: 0.1028233	total: 431ms	remai

StackingClassifier(estimators=[('catboost',
                                <catboost.core.CatBoostClassifier object at 0x000001F4599E9130>),
                               ('xgboost',
                                XGBClassifier(base_score=None, booster=None,
                                              colsample_bylevel=None,
                                              colsample_bynode=None,
                                              colsample_bytree=None,
                                              enable_categorical=False,
                                              gamma=None, gpu_id=None,
                                              importance_type=None,
                                              interaction_constraints=None,
                                              learning_rate=0.1,
                                              max_delt...
                                              predictor=None, random_state=42,
                                         

In [None]:
y_pred = stacking_model.predict(X_test)
y_train_predict = stacking_model.predict(X_train_resampled)

print("Accuracy Score of Model1_train : ", accuracy_score(y_train_resampled,y_train_predict))
print("Accuracy Score of Model1_test : ", accuracy_score(y_test,y_pred))

In [None]:
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print('--'*50)
print(classification_report(y_train_resampled, y_train_predict))
print(confusion_matrix(y_train_resampled, y_train_predict))

In [None]:
result = stacking_model.predict(X_test)
print(f'{sum(result)} defaults')

66.0 defaults


In [None]:
results = pd.DataFrame()
results['ID'] = test.ID
results['My prediction'] = result
results['Real result'] = y_test
results.head()

In [None]:
share_of_wrong_predicted = results[['My prediction', 'Real result']].duplicated().sum()
print(f'{share_of_wrong_predicted} from {results.shape[0]} observations were predicted right')

38401 from 38405 observations were predicted right


In [None]:
results.to_csv('res.csv')