# Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor

# Load data, train-test split

In [2]:
df = pd.read_csv("winequality-red.csv")

X = df.drop("quality", axis=1)  # X contains model features.
y = df["quality"] # y is the new class label to predict.

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=10
)

# Grid Search
We perform hyperparameter tuning by testing multiple Random Forest settings and select the model that achieves the best negative mean squared error score.

In [None]:
param_grid = {                     # The hyperparameter combinations we are trying.
    "n_estimators": [100,1000],
    "max_depth": [None, 1],
    "min_samples_split": [3,4],
    "min_samples_leaf": [3,4],
    "max_features": ["sqrt"]
}

rf = RandomForestRegressor(random_state=10, n_jobs=-1) # Creating a Random Forest Regressor model.

grid_search = GridSearchCV( # Testing multiple hyperparameter combinations to find the best model.
    estimator=rf,
    param_grid=param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1
)

grid_search.fit(X_train, y_train) # Training models and selecting the best configuration.

# Results

In [None]:
print("Best parameters:", grid_search.best_params_)

best_rf = grid_search.best_estimator_
y_pred_best = best_rf.predict(X_test)

print("Results:")
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_best)))
print("MAE:", mean_absolute_error(y_test, y_pred_best))
print("R²:", r2_score(y_test, y_pred_best))

RMSE = 0.62 → On average, the model’s wine-quality predictions are off by about 0.6 points on the 0–10 quality scale (meaning errors are relatively small, but not perfect).

MAE = 0.46 → The model is typically less than half a quality point wrong, which indicates decent but not highly precise predictions.

R² = 0.46 → The model explains about 46% of the variation in wine quality, meaning more than half of the influencing factors are still not captured or are too complex for the model.