In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

In [9]:
# 2. Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=';')
X = df.drop("quality", axis=1)
y = df["quality"]

In [10]:
# 3. Pipeline: Polynomial Features (degree=2) + Scaling + Linear Regression
model = Pipeline([
    ("poly", PolynomialFeatures(degree=4, include_bias=False)),  # degree=4 => non-linear
    ("scaler", StandardScaler()),
    ("lr", LinearRegression())
])

In [11]:
# Leave-One-Out Cross-Validation
# it may take long time
loo = LeaveOneOut()
mse_list, rmse_list, r2_list = [], [], []     # we’ll store metrics here

print("Non-Linear Polynomial (Degree = 4)  —  Leave-One-Out CV")
for i, (train_idx, test_idx) in enumerate(loo.split(X), start=1):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    # 1. Fit the pipeline on the training subset
    model.fit(X_train, y_train)

    # 2. Predict on the single held-out sample
    y_pred = model.predict(X_test)

    # 3. Compute per-sample metrics
    #    ⚠  R² is mathematically undefined for a single-sample test set
    #       (SST = 0 ⇒ division by zero). Scikit-learn returns “nan”.
    r2   = r2_score(y_test, y_pred)        # will usually be nan
    mse  = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    r2_list.append(r2)
    mse_list.append(mse)
    rmse_list.append(rmse)

    # Optional progress logging: printing every ~200 steps
    if i % 200 == 0 or i in (1, len(X)):
        print(f"Step {i}/{len(X)}  |  sample MSE = {mse:.4f}")

# 4. Aggregate results
print("-----------------------------------------------------------")
print(f"Leave-One-Out average R²   : {np.nanmean(r2_list):.4f}")   # nan-safe mean
print(f"Leave-One-Out average MSE  : {np.mean(mse_list):.4f}")
print(f"Leave-One-Out average RMSE : {np.mean(rmse_list):.4f}\n")



Non-Linear Polynomial (Degree=4)  —  Leave-One-Out CV
Step 1/1599  |  Sample MSE = 0.0027
Step 200/1599  |  Sample MSE = 43.2509


KeyboardInterrupt: 

In [None]:
# Compute full-fit metrics
model.fit(X, y)
y_pred_full = model.predict(X)
r2_full   = r2_score(y, y_pred_full)
mse_full  = mean_squared_error(y, y_pred_full)
rmse_full = np.sqrt(mse_full)

print(f"Full-fit R²   : {r2_full:.4f}")
print(f"Full-fit MSE  : {mse_full:.4f}")
print(f"Full-fit RMSE : {rmse_full:.4f}")


In [None]:
# Predict new sample
new_sample = {
    "fixed_acidity"        : 7.4,
    "volatile_acidity"     : 0.70,
    "citric_acid"          : 0.00,
    "residual_sugar"       : 1.9,
    "chlorides"            : 0.076,
    "free_sulfur_dioxide"  : 11.0,
    "total_sulfur_dioxide" : 34.0,
    "density"              : 0.9978,
    "ph"                   : 3.51,
    "sulphates"            : 0.56,
    "alcohol"              : 9.4
}

X_new = np.array([[new_sample[col] for col in feature_cols]])

pred_quality = model.predict(X_new)[0] # predict

print(f"Predicted wine quality: {pred_quality:.2f} (scale 0–10)")

Predicted wine quality: 5.18 (scale 0–10)


