A KFold split will take the data and split it however many times you designate. StratifiedKFold is used in order to ensure that your training and validation datasets each contain the same percentage of classes (see sklearn documentation for more). The function StratifiedKFold takes two arguments, the array of labels (for binary classification this would be an array of 1's and 0's) and the number of folds. They have designated the number of folds as 1./eval_size where eval_size = 0.10. So this is a 10-fold validation.

In [None]:
from sklearn.model_selection import StratifiedKFold
target = df['Loan_Status']
X_train, X_test, y_train, y_test = train_test_split(df1, target, test_size=0.25, random_state=0)
print(y_train)

kf = StratifiedKFold(n_splits=25, shuffle=True, random_state=0)
print(kf)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn import metrics
import itertools

modelLR = LogisticRegression(warm_start=True,
                             max_iter=200,
                             fit_intercept=True)
modelLR.fit(X_train,y_train)

score_val = np.mean(cross_val_score(modelLR, X_train, y_train, cv=kf, scoring='accuracy') )
predicted = modelLR.predict(X_test)

print('cross_val_score {}\n'.format(score_val))
print('classification_report\n', metrics.classification_report(y_test, predicted))
print('accuracy_score {}\n'.format(metrics.accuracy_score(y_test, predicted)))


def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


# Compute confusion matrix
cm = metrics.confusion_matrix(y_test, predicted)
#plot_confusion_matrix(cm)

plt.figure()
plot_confusion_matrix(cm, classes=target,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cm, classes=target, normalize=True,
                      title='Normalized confusion matrix')

plt.show()
