In [6]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
import numpy as np
iris = load_iris()


def k_fold_cross_validation(X, y, k, model):
    """
    Perform k-fold cross-validation.

    Parameters:
    - X: Features (NumPy array)
    - y: Labels (NumPy array)
    - k: Number of folds
    - model: Your machine learning model (function or class with fit and predict methods)

    Returns:
    - average_accuracy: Average accuracy over all folds
    """

    # Shuffle the data
    data = np.column_stack((X, y))
    np.random.shuffle(data)

    # Split data into k folds
    folds = np.array_split(data, k)

    accuracies = []

    for i in range(k):
        # Use ith fold as the validation set
        validation_set = folds[i]
        validation_X = validation_set[:, :-1]
        validation_y = validation_set[:, -1]

        # Use the remaining folds for training
        training_sets = np.concatenate([fold for j, fold in enumerate(folds) if j != i])
        training_X = training_sets[:, :-1]
        training_y = training_sets[:, -1]

        # Train the model
        model.fit(training_X, training_y)

        # Predict on the validation set
        predictions = model.predict(validation_X)

        # Calculate accuracy
        accuracy = np.mean(predictions == validation_y)
        accuracies.append(accuracy)

    return accuracies

X = np.random.rand(100, 5)
y = np.random.randint(0, 2, 100)

# Example usage of k-fold cross-validation
k = 5
model = LogisticRegression(max_iter=1000)
accuracies = k_fold_cross_validation(iris.data, iris.target, k, model)
print('Average Accuracy: {}'.format(accuracies))


Average Accuracy: [1.0, 0.9333333333333333, 0.9666666666666667, 0.9333333333333333, 0.9666666666666667]


In [2]:
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

iris = load_iris()
logreg = LogisticRegression(max_iter=1000)

# my note: you can adjust the "cv" parameter to determine how many folds to create, it defaults to 3
scores = cross_val_score(logreg, iris.data, iris.target)
print("Cross-validation scores: {}".format(scores))

Cross-validation scores: [0.96666667 1.         0.93333333 0.96666667 1.        ]
