In [None]:
import numpy as np
from sklearn.model_selection import cross_val_score, KFold
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Define models
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(max_depth=5)
}

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform Cross-Validation
for model_name, model in models.items():
    scores = cross_val_score(model, X, y, cv=kf, scoring='accuracy')
    print(f"{model_name}:")
    print(f"  Mean Accuracy: {scores.mean():.4f}")
    print(f"  Standard Deviation: {scores.std():.4f}\n")

    # Bias and Variance Analysis
    train_errors = []
    test_errors = []

    for train_idx, test_idx in kf.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        model.fit(X_train, y_train)

        # Calculate training and testing error
        train_error = 1 - model.score(X_train, y_train)
        test_error = 1 - model.score(X_test, y_test)

        train_errors.append(train_error)
        test_errors.append(test_error)

    print(f"  Bias (Training Error Mean): {np.mean(train_errors):.4f}")
    print(f"  Variance (Test Error Mean): {np.mean(test_errors):.4f}")
    print(f"  Variance of Errors: {np.std(test_errors):.4f}\n")


Logistic Regression:
  Mean Accuracy: 0.9733
  Standard Deviation: 0.0249

  Bias (Training Error Mean): 0.0250
  Variance (Test Error Mean): 0.0267
  Variance of Errors: 0.0249

Decision Tree:
  Mean Accuracy: 0.9533
  Standard Deviation: 0.0267

  Bias (Training Error Mean): 0.0033
  Variance (Test Error Mean): 0.0467
  Variance of Errors: 0.0267

