It splits your data into K folds, trains on K-1 folds and tests on the remaining fold, repeats K times, and returns a score per fold → you take mean ± std for a robust estimate.

In [2]:
!pip install numpy scikit-learn



In [3]:
import numpy as np 
from sklearn.datasets import fetch_california_housing 
from sklearn.model_selection import cross_val_score, KFold 
from sklearn.preprocessing import StandardScaler, PolynomialFeatures 
from sklearn.linear_model import Ridge 
from sklearn.pipeline import make_pipeline

In [4]:
#Data 
X, y = fetch_california_housing(return_X_y=True)

#model pipeline 
model = make_pipeline(
    StandardScaler(with_mean=False),
    PolynomialFeatures(degree=2, include_bias=False),
    Ridge(alpha=1.0, random_state=0)
)

#CV split 
cv = KFold(n_splits=5, shuffle=True, random_state=42)



#Scoring
scores = cross_val_score(model, X, y, cv=cv, scoring="neg_mean_squared_error")
rmse = np.sqrt(-scores)

print("RMSE per fold:", rmse)
print(f"CV RMSE: {rmse.mean():.3f} ± {rmse.std():.3f}")

RMSE per fold: [0.67485366 0.67017485 0.66291769 3.79696243 0.68243907]
CV RMSE: 1.297 ± 1.250
