# Leave-One-Out Cross Validation (LOOCV)

In [1]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import LeaveOneOut, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, make_scorer
import numpy as np

In [2]:
data = load_diabetes()

In [3]:
X = data.data
y = data.target

In [4]:
model = LinearRegression()
model

In [5]:
loo = LeaveOneOut()
loo

LeaveOneOut()

In [6]:
scores = cross_val_score(model, X, y, cv = loo, scoring = 'neg_mean_squared_error')
scores

array([-3.14794770e+03, -5.02299665e+01, -1.35042062e+03, -1.58815516e+03,
       -4.38697661e+01, -9.11699733e+01, -4.24824502e+03, -3.33559806e+03,
       -2.48957190e+03, -9.76194482e+03, -1.66855156e+01, -7.68407579e+02,
       -4.14895063e+03, -4.22060805e+02, -2.31972334e+02, -4.22803841e+01,
       -2.18981772e+03, -1.55648697e+03, -2.65046078e+03, -1.97060676e+03,
       -2.83484225e+03, -1.41962489e+03, -2.12459524e+03, -7.03852993e+01,
       -3.53231928e+02, -3.05527315e+03, -1.68097280e+03, -9.21798795e+03,
       -3.99838388e+00, -1.04949698e+04, -9.08667363e+02, -1.12902660e+02,
       -7.28011691e+03, -7.27888365e+02, -1.84745380e+02, -2.28733361e+02,
       -3.38746745e+03, -1.43795767e+04, -1.30290668e+02, -2.29802830e+03,
       -2.99448445e+03, -3.87339145e+02, -7.37138863e+03, -2.17566968e+02,
       -1.50755348e+03, -5.35929606e+03, -2.32211410e+03, -1.11355631e+03,
       -3.59967517e+00, -2.37852472e+03, -8.83324664e+00, -3.16095842e+03,
       -5.78275140e+03, -

In [8]:
mse_scores = -scores
mse_scores

array([3.14794770e+03, 5.02299665e+01, 1.35042062e+03, 1.58815516e+03,
       4.38697661e+01, 9.11699733e+01, 4.24824502e+03, 3.33559806e+03,
       2.48957190e+03, 9.76194482e+03, 1.66855156e+01, 7.68407579e+02,
       4.14895063e+03, 4.22060805e+02, 2.31972334e+02, 4.22803841e+01,
       2.18981772e+03, 1.55648697e+03, 2.65046078e+03, 1.97060676e+03,
       2.83484225e+03, 1.41962489e+03, 2.12459524e+03, 7.03852993e+01,
       3.53231928e+02, 3.05527315e+03, 1.68097280e+03, 9.21798795e+03,
       3.99838388e+00, 1.04949698e+04, 9.08667363e+02, 1.12902660e+02,
       7.28011691e+03, 7.27888365e+02, 1.84745380e+02, 2.28733361e+02,
       3.38746745e+03, 1.43795767e+04, 1.30290668e+02, 2.29802830e+03,
       2.99448445e+03, 3.87339145e+02, 7.37138863e+03, 2.17566968e+02,
       1.50755348e+03, 5.35929606e+03, 2.32211410e+03, 1.11355631e+03,
       3.59967517e+00, 2.37852472e+03, 8.83324664e+00, 3.16095842e+03,
       5.78275140e+03, 3.02113065e+03, 1.89721568e+03, 3.20673279e+03,
      

In [9]:
print("1. Leave-One-Out Cross Validation (Linear Regression)")
print("Number of splits:", loo.get_n_splits(X))
print("First 5 MSE scores:", mse_scores[:5])
print("Mean MSE:", mse_scores.mean())

1. Leave-One-Out Cross Validation (Linear Regression)
Number of splits: 442
First 5 MSE scores: [3147.94770214   50.22996655 1350.4206212  1588.15515661   43.86976605]
Mean MSE: 3001.752846999431


# Leave-P-Out Cross Validation (P=2)

In [11]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import LeavePOut, cross_val_score
from sklearn.linear_model import LinearRegression
import numpy as np

In [12]:
data = load_diabetes()
X = data.data[:20]  # smaller sample to reduce time
y = data.target[:20]

In [13]:
model = LinearRegression()

In [14]:
lpo = LeavePOut(p=2)

In [15]:
scores = cross_val_score(model, X, y, cv=lpo, scoring='neg_mean_squared_error')

In [16]:
mse_scores = -scores

In [17]:
print("2. Leave-P-Out Cross Validation (P=2) with Linear Regression")
print("Number of combinations (splits):", lpo.get_n_splits(X))
print("First 5 MSE scores:", mse_scores[:5])
print("Mean MSE:", mse_scores.mean())

2. Leave-P-Out Cross Validation (P=2) with Linear Regression
Number of combinations (splits): 190
First 5 MSE scores: [2531.4490979  2867.82286139  417.59255548 1113.85108795 2112.13480627]
Mean MSE: 2241.7855237836043


#  K-Fold Cross Validation

In [18]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression
import numpy as np

In [19]:
data = load_diabetes()
X = data.data
y = data.target

In [20]:
model = LinearRegression()

In [21]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [22]:
scores = cross_val_score(model, X, y, cv=kf, scoring='neg_mean_squared_error')

In [23]:
mse_scores = -scores

In [24]:
print("3. K-Fold Cross Validation (K=5) with Linear Regression")
print("MSE for each fold:", mse_scores)
print("Mean MSE:", mse_scores.mean())

3. K-Fold Cross Validation (K=5) with Linear Regression
MSE for each fold: [2900.19362849 2662.63760862 3312.30588884 2797.88355256 3403.88779293]
Mean MSE: 3015.381694287271


# Stratified K-Fold Cross Validation

In [25]:
from sklearn.datasets import load_iris
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.linear_model import LogisticRegression

In [26]:
iris = load_iris()
X = iris.data
y = iris.target

In [27]:
model = LogisticRegression(max_iter=1000)

In [28]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [29]:
scores = cross_val_score(model, X, y, cv=skf)

In [30]:
print("4. Stratified K-Fold Cross Validation (K=5) - Classification")
print("Accuracy for each fold:", scores)
print("Mean Accuracy:", scores.mean())

4. Stratified K-Fold Cross Validation (K=5) - Classification
Accuracy for each fold: [1.         0.96666667 0.93333333 1.         0.93333333]
Mean Accuracy: 0.9666666666666668
