# Wine Quality ‚Äî XGBoost Regression (one command per code cell)
# Gradient boosted trees via XGBoost library. Install xgboost first in your environment.


# Installation note (commented)

In [None]:
# !pip install xgboost  # install XGBoost if not available (commented to avoid running here)


# Imports

In [None]:
import pandas as pd  # data handling


In [None]:
import numpy as np  # numeric helpers


In [None]:
from sklearn.model_selection import train_test_split, cross_validate  # split and CV wrappers


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score  # metrics


In [None]:
import xgboost as xgb  # XGBoost library (requires installation)


# Load dataset

In [None]:
DATASET = 'red'  # 'red' or 'white'


In [None]:
CSV_PATH = f"/Users/sb/Documents/Study-Material/Machine-Learning/winequality-{DATASET}.csv"  # dataset path


In [None]:
df = pd.read_csv(CSV_PATH, sep=';')  # load CSV


# Split

In [None]:
X = df.drop(columns=['quality'])  # features


In [None]:
y = df['quality']  # target


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # split


# Model

In [None]:
xgb_reg = xgb.XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    random_state=42,
    tree_method='hist',
)  # XGBoost regressor with common settings


In [None]:
xgb_reg.fit(X_train, y_train)  # fit model


In [None]:
pred = xgb_reg.predict(X_test)  # predictions


# Evaluate

In [None]:
mae = mean_absolute_error(y_test, pred)  # MAE


In [None]:
rmse = mean_squared_error(y_test, pred, squared=False)  # RMSE


In [None]:
r2 = r2_score(y_test, pred)  # R^2


In [None]:
{'MAE': mae, 'RMSE': rmse, 'R2': r2}  # metrics


# Cross-validation (wrapper using scikit-learn)

In [None]:
cv = cross_validate(xgb_reg, X, y, cv=5, scoring=('r2','neg_root_mean_squared_error'))  # 5-fold CV


In [None]:
{k: (np.mean(v), np.std(v)) for k, v in cv.items() if k.startswith('test_')}  # CV summary


# Feature importance (gain-based)

In [None]:
dict(sorted({col: imp for col, imp in zip(X.columns, xgb_reg.feature_importances_)}.items(), key=lambda kv: kv[1], reverse=True))  # feature importances


# MLflow Tracking ‚Äî log params, metrics, and model (ensure mlflow installed)

In [None]:
import mlflow  # experiment tracking


In [None]:
import mlflow.sklearn  # sklearn/XGB wrapper logging


In [None]:
mlflow.set_experiment('XGBoostRegression-WineQuality')  # experiment per algorithm


In [None]:
mlflow.start_run(run_name=f"{DATASET}")  # start run


In [None]:
mlflow.log_params({
    'n_estimators': getattr(xgb_reg, 'n_estimators', None),
    'learning_rate': getattr(xgb_reg, 'learning_rate', None),
    'max_depth': getattr(xgb_reg, 'max_depth', None),
    'subsample': getattr(xgb_reg, 'subsample', None),
    'colsample_bytree': getattr(xgb_reg, 'colsample_bytree', None),
    'reg_lambda': getattr(xgb_reg, 'reg_lambda', None),
    'tree_method': getattr(xgb_reg, 'tree_method', None),
})  # params


In [None]:
mlflow.log_metric('MAE', mae)  # MAE


In [None]:
mlflow.log_metric('RMSE', rmse)  # RMSE


In [None]:
mlflow.log_metric('R2', r2)  # R2


In [None]:
mlflow.sklearn.log_model(xgb_reg, artifact_path='model')  # log model


In [None]:
mlflow.end_run()  # end run


# Persistence ‚Äî save to disk and reload

In [None]:
import os  # filesystem utils


In [None]:
import joblib  # model persistence


In [None]:
from datetime import datetime  # timestamp for filenames


In [None]:
os.makedirs('Machine-Learning/models/XGBoostRegression', exist_ok=True)  # ensure output dir exists


In [None]:
MODEL_PATH = f"Machine-Learning/models/XGBoostRegression/xgboost_wine_{DATASET}_" + datetime.now().strftime('%Y%m%d_%H%M%S') + ".joblib"  # build model path


In [None]:
joblib.dump(xgb_reg, MODEL_PATH)  # save model


In [None]:
loaded = joblib.load(MODEL_PATH)  # load model


In [None]:
float(loaded.predict(X_test.iloc[[0]])[0])  # verify loaded model prediction


# Streamlit App ‚Äî minimal predictor UI

In [None]:
APP_CODE = """
import joblib, pandas as pd, streamlit as st
st.set_page_config(page_title='Wine ‚Äî XGBoost Regression', page_icon='üç∑')
st.title('üç∑ Wine Quality ‚Äî XGBoost Regression')
MODEL_PATH = st.text_input('Model path', value=r'%s')
if MODEL_PATH and st.button('Load model'):
    try:
        st.session_state['model'] = joblib.load(MODEL_PATH)
        st.success('Model loaded')
    except Exception as e:
        st.error(f'Load failed: {e}')
if 'model' in st.session_state:
    cols = %s
    vals = {c: st.number_input(c, value=0.0) for c in cols}
    if st.button('Predict quality'):
        df = pd.DataFrame([vals])
        try:
            y = st.session_state['model'].predict(df)
            st.metric('Predicted quality', f"{float(y[0]):.3f}")
        except Exception as e:
            st.error(f'Predict failed: {e}')
""" % (MODEL_PATH, list(X.columns))


In [None]:
open('app_streamlit_wine.py', 'w').write(APP_CODE)  # write app file


In [None]:
float(xgb_reg.predict(X_test.iloc[[0]])[0])  # single-row prediction
