In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

# Import the cross-validation tool
from sklearn.model_selection import cross_val_score

In [3]:
# Load the data, keeping the features (X) and target (y) separated
iris = load_iris(as_frame=True)
x, y = iris.data, iris.target

print(f"Total samples: {len(x)}")

Total samples: 150


In [4]:
# Instantiate the model
logreg = LogisticRegression(max_iter=200)

print("Logistic Regression model instantiated.")

Logistic Regression model instantiated.


In [7]:
# Perform 5-Fold Cross-Validation (CV)
# cv=5 means 5 folds. The scoring='accuracy' tells it what metric to return.
cv_scores = cross_val_score(
    estimator=logreg, # The model to use
    X=x,              # The FULL feature set (CV handles the splitting)
    y=y,              # The FULL target set
    cv=5,             # Number of folds
    scoring='accuracy'
)

print("Individual Cross-Validation Scores (5 Folds):")
print(cv_scores)

Individual Cross-Validation Scores (5 Folds):
[0.96666667 1.         0.93333333 0.96666667 1.        ]


In [8]:
# Calculate the mean (average performance)
mean_cv_score = np.mean(cv_scores)

# Calculate the standard deviation (measure of variability/robustness)
std_cv_score = np.std(cv_scores)

print(f"\nMean CV Accuracy: {mean_cv_score:.4f}")
print(f"Standard Deviation of CV Scores: {std_cv_score:.4f}")


Mean CV Accuracy: 0.9733
Standard Deviation of CV Scores: 0.0249


In [9]:
## Mean accuracy = High, therefore model is relatively accurate
## Std Dev = Low, therefore model is precise i.e. repeatable