In [2]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

features, labels = make_classification(n_samples=200, n_features=5)

num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True)

train_errors = []
test_errors = []

for train_idx, test_idx in kf.split(features):
    X_train, X_test = features[train_idx], features[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]

    model = LogisticRegression()
    model.fit(X_train, y_train)

    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)

    train_accuracy = accuracy_score(y_train, train_predictions)
    test_accuracy = accuracy_score(y_test, test_predictions)

    train_errors.append(1 - train_accuracy)
    test_errors.append(1 - test_accuracy)

mean_train_error = np.mean(train_errors)
mean_test_error = np.mean(test_errors)

print(f"Train Errors (Per Fold): {train_errors}")
print(f"Test Errors (Per Fold): {test_errors}")
print(f"Mean Train Error: {mean_train_error:.4f}")
print(f"Mean Test Error: {mean_test_error:.4f}")


Train Errors (Per Fold): [0.11250000000000004, 0.11250000000000004, 0.13124999999999998, 0.07499999999999996, 0.11250000000000004]
Test Errors (Per Fold): [0.07499999999999996, 0.07499999999999996, 0.050000000000000044, 0.25, 0.15000000000000002]
Mean Train Error: 0.1088
Mean Test Error: 0.1200
