In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, KFold, LeaveOneOut, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the dataset
boston = fetch_openml(name="boston", version=1, as_frame=True)
X = boston.data
y = boston.target

In [3]:
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
# Initialize model
model = LinearRegression()

In [5]:
#Hold-Out Validation
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
holdout_mse = mean_squared_error(y_test, y_pred)
print(f"Hold-Out MSE: {holdout_mse:.4f}")

Hold-Out MSE: 24.2911


In [6]:
#K-Fold Cross Validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
kfold_scores = cross_val_score(model, X_scaled, y, cv=kfold, scoring='neg_mean_squared_error')
kfold_mse = -np.mean(kfold_scores)
print(f"10-Fold CV MSE: {kfold_mse:.4f}")

10-Fold CV MSE: 23.3642


In [7]:
#Leave-One-Out Cross Validation (LOOCV)
loocv = LeaveOneOut()
loocv_scores = cross_val_score(model, X_scaled, y, cv=loocv, scoring='neg_mean_squared_error')
loocv_mse = -np.mean(loocv_scores)
print(f"LOOCV MSE: {loocv_mse:.4f}")

LOOCV MSE: 23.7257


In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Define the Stratified K-Fold cross-validator
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accuracies = []
fold = 1

for train_index, test_index in skf.split(X, y):
    print(f"\nFold {fold}")
    
    # Split the data
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train a model
    model = LogisticRegression(max_iter=200)
    model.fit(X_train, y_train)
    
    # Predict and evaluate
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)
    
    print(f"Train size: {len(train_index)}, Test size: {len(test_index)}")
    print(f"Accuracy: {accuracy:.2f}")
    
    fold += 1

# Mean accuracy
mean_accuracy = np.mean(accuracies)
print(f"\n📊 Mean Accuracy over {skf.n_splits} folds: {mean_accuracy:.4f}")



Fold 1
Train size: 120, Test size: 30
Accuracy: 1.00

Fold 2
Train size: 120, Test size: 30
Accuracy: 0.97

Fold 3
Train size: 120, Test size: 30
Accuracy: 0.93

Fold 4
Train size: 120, Test size: 30
Accuracy: 1.00

Fold 5
Train size: 120, Test size: 30
Accuracy: 0.93

📊 Mean Accuracy over 5 folds: 0.9667
