Leave-One-Out Cross-Validation (LOOCV) is a special case of k-fold cross-validation where *k* equals the number of data points. The model is trained on *n-1* samples and tested on the single remaining sample, repeating this for each data point. LOOCV provides an unbiased estimate of model performance but is computationally expensive for large datasets.

In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import mean_squared_error

# Sample dataset
X = np.array([[1500, 3], [1200, 2], [1800, 4], [1100, 2], [2000, 5]])  # Features: Size, Bedrooms
y = np.array([300000, 250000, 350000, 220000, 400000])  # Target: Price

# Initialize the model and LOOCV
model = LinearRegression()
loo = LeaveOneOut()

# Array to store the errors
mse_scores = []

# Perform LOOCV
for train_index, test_index in loo.split(X):
    # Split the data into training and test sets
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the model
    model.fit(X_train, y_train)
    
    # Make prediction on the test sample
    y_pred = model.predict(X_test)
    
    # Compute the Mean Squared Error for this iteration
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

# Compute the average MSE across all iterations
average_mse = np.mean(mse_scores)

print(f"MSE for each fold: {mse_scores}")
print(f"Average MSE across all folds: {average_mse:.2f}")

MSE for each fold: [3.3881317890172014e-21, 182250000.0, 182250000.0, 900000000.0000035, 900000000.0000035]
Average MSE across all folds: 432900000.00


In [2]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import mean_squared_error

# Load the diabetes dataset
data = load_diabetes()
X = data.data  # Features
y = data.target  # Target: diabetes progression

# Initialize the model and LOOCV
model = LinearRegression()
loo = LeaveOneOut()

# Array to store the errors
mse_scores = []

# Perform LOOCV
for train_index, test_index in loo.split(X):
    # Split the data into training and test sets
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the model
    model.fit(X_train, y_train)
    
    # Make prediction on the test sample
    y_pred = model.predict(X_test)
    
    # Compute the Mean Squared Error for this iteration
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

# Compute the average MSE across all iterations
average_mse = np.mean(mse_scores)

print(f"Average MSE across all folds: {average_mse:.2f}")

Average MSE across all folds: 3001.75
