In [1]:
# Import necessary libraries
from sklearn.datasets import load_breast_cancer
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# Load the breast cancer dataset as a pandas DataFrame
data = load_breast_cancer(as_frame=True)
df = data.frame

# Separate the features (X) and target (Y)
X = df.iloc[:, :-1]
Y = df.iloc[:, -1]

# Number of folds for k-fold cross-validation
k = 5

# Create a k-fold cross-validation object
k_fold = KFold(n_splits=k, random_state=None)

# Initialize a Logistic Regression classifier
Lr = LogisticRegression(solver='liblinear')

# List to store accuracy scores for each fold
acc_scores = []

# Loop through each fold in the cross-validation
for training_index, testing_index in k_fold.split(X):
    # Split the data into training and testing sets for this fold
    X_train, X_test = X.iloc[training_index, :], X.iloc[testing_index, :]
    Y_train, Y_test = Y.iloc[training_index], Y.iloc[testing_index]

    # Fit the Logistic Regression model on the training data
    Lr.fit(X_train, Y_train)

    # Make predictions on the testing data
    Y_pred = Lr.predict(X_test)

    # Calculate accuracy for this fold and add it to the list
    acc = accuracy_score(Y_pred, Y_test)
    acc_scores.append(acc)

# Calculate the mean accuracy score across all folds
mean_acc_score = sum(acc_scores) / k

# Print the accuracy scores for each fold and the mean accuracy score
print("Accuracy score of each fold: ", acc_scores)
print("Mean accuracy score: ", mean_acc_score)

Accuracy score of each fold:  [0.9122807017543859, 0.9473684210526315, 0.9736842105263158, 0.9736842105263158, 0.9557522123893806]
Mean accuracy score:  0.952553951249806
