# Wine Quality ‚Äî Decision Tree Regression (one command per code cell)
# Non-parametric tree model; captures nonlinear feature interactions.


# Imports

In [None]:
import pandas as pd  # data handling


In [None]:
import numpy as np  # numeric helpers


In [None]:
from sklearn.model_selection import train_test_split, cross_validate  # split and CV


In [None]:
from sklearn.tree import DecisionTreeRegressor  # decision tree regressor


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score  # metrics


# Load dataset

In [None]:
DATASET = 'white'  # 'red' or 'white'


In [None]:
CSV_PATH = f"/Users/sb/Documents/Study-Material/Machine-Learning/winequality-{DATASET}.csv"  # dataset path


In [None]:
df = pd.read_csv(CSV_PATH, sep=';')  # load CSV


# Split

In [None]:
X = df.drop(columns=['quality'])  # features


In [None]:
y = df['quality']  # target


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # split


# Model

In [None]:
tree = DecisionTreeRegressor(random_state=42, max_depth=None, min_samples_leaf=3)  # basic tree with small leaf size


In [None]:
tree.fit(X_train, y_train)  # fit model


In [None]:
pred = tree.predict(X_test)  # predictions


# Evaluate

In [None]:
mae = mean_absolute_error(y_test, pred)  # MAE


In [None]:
rmse = mean_squared_error(y_test, pred, squared=False)  # RMSE


In [None]:
r2 = r2_score(y_test, pred)  # R^2


In [None]:
{'MAE': mae, 'RMSE': rmse, 'R2': r2}  # metrics


# Cross-validation

In [None]:
cv = cross_validate(tree, X, y, cv=5, scoring=('r2','neg_root_mean_squared_error'))  # 5-fold CV


In [None]:
{k: (np.mean(v), np.std(v)) for k, v in cv.items() if k.startswith('test_')}  # CV summary


# MLflow Tracking ‚Äî log params, metrics, and model

In [None]:
import mlflow  # experiment tracking


In [None]:
import mlflow.sklearn  # sklearn flavor logging


In [None]:
mlflow.set_experiment('DecisionTreeRegression-WineQuality')  # experiment per algorithm


In [None]:
mlflow.start_run(run_name=f"{DATASET}")  # start run


In [None]:
mlflow.log_params({'max_depth': getattr(tree, 'max_depth', None), 'min_samples_leaf': getattr(tree, 'min_samples_leaf', None), 'random_state': getattr(tree, 'random_state', None)})  # params


In [None]:
mlflow.log_metric('MAE', mae)  # MAE


In [None]:
mlflow.log_metric('RMSE', rmse)  # RMSE


In [None]:
mlflow.log_metric('R2', r2)  # R2


In [None]:
mlflow.sklearn.log_model(tree, artifact_path='model')  # log model


In [None]:
mlflow.end_run()  # end run


# Persistence ‚Äî save to disk and reload

In [None]:
import os  # fs utils


In [None]:
import joblib  # persistence


In [None]:
from datetime import datetime  # timestamp


In [None]:
os.makedirs('Machine-Learning/models/DecisionTreeRegression', exist_ok=True)  # ensure dir


In [None]:
MODEL_PATH = f"Machine-Learning/models/DecisionTreeRegression/decision_tree_wine_{DATASET}_" + datetime.now().strftime('%Y%m%d_%H%M%S') + ".joblib"  # path


In [None]:
joblib.dump(tree, MODEL_PATH)  # save


In [None]:
loaded = joblib.load(MODEL_PATH)  # load


In [None]:
float(loaded.predict(X_test.iloc[[0]])[0])  # quick check


# Streamlit App ‚Äî minimal predictor UI

In [None]:
APP_CODE = """
import joblib, pandas as pd, streamlit as st
st.set_page_config(page_title='Wine ‚Äî Decision Tree', page_icon='üç∑')
st.title('üç∑ Wine Quality ‚Äî Decision Tree Regression')
MODEL_PATH = st.text_input('Model path', value=r'%s')
if MODEL_PATH and st.button('Load model'):
    try:
        st.session_state['model'] = joblib.load(MODEL_PATH)
        st.success('Model loaded')
    except Exception as e:
        st.error(f'Load failed: {e}')
if 'model' in st.session_state:
    cols = %s
    vals = {c: st.number_input(c, value=0.0) for c in cols}
    if st.button('Predict quality'):
        df = pd.DataFrame([vals])
        try:
            y = st.session_state['model'].predict(df)
            st.metric('Predicted quality', f"{float(y[0]):.3f}")
        except Exception as e:
            st.error(f'Predict failed: {e}')
""" % (MODEL_PATH, list(X.columns))


In [None]:
open('app_streamlit_wine.py', 'w').write(APP_CODE)


In [None]:
float(tree.predict(X_test.iloc[[0]])[0])  # single-row prediction
