# Wine Quality ‚Äî Polynomial Linear Regression (one command per code cell)
# Steps: load ‚Üí checks ‚Üí split ‚Üí pipeline(PolynomialFeatures+LinearRegression) ‚Üí evaluate ‚Üí CV ‚Üí persistence (commented) ‚Üí inference.


# Imports

In [None]:
import pandas as pd  # data handling


In [None]:
import numpy as np  # numeric helpers


In [None]:
from sklearn.model_selection import train_test_split, cross_validate  # split + CV


In [None]:
from sklearn.preprocessing import PolynomialFeatures  # polynomial feature generator


In [None]:
from sklearn.linear_model import LinearRegression  # linear regressor (used after poly expansion)


In [None]:
from sklearn.pipeline import Pipeline  # chain preprocessing and model


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score  # metrics


# Load dataset selector

In [None]:
DATASET = 'red'  # choose 'red' or 'white'


In [None]:
CSV_PATH = f"/Users/sb/Documents/Study-Material/Machine-Learning/winequality-{DATASET}.csv"  # dataset path


In [None]:
df = pd.read_csv(CSV_PATH, sep=';')  # read CSV with semicolon separator


# Split

In [None]:
X = df.drop(columns=['quality'])  # features


In [None]:
y = df['quality']  # target


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # hold-out split


# Pipeline: PolynomialFeatures + LinearRegression

In [None]:
poly_deg = 2  # degree of polynomial features (change to 3 for higher-order)


In [None]:
pipe = Pipeline([('poly', PolynomialFeatures(degree=poly_deg, include_bias=False)), ('lin', LinearRegression())])  # build pipeline


In [None]:
pipe.fit(X_train, y_train)  # fit pipeline on training data


In [None]:
pred = pipe.predict(X_test)  # predict on test set


# Evaluate

In [None]:
mae = mean_absolute_error(y_test, pred)  # MAE


In [None]:
rmse = mean_squared_error(y_test, pred, squared=False)  # RMSE


In [None]:
r2 = r2_score(y_test, pred)  # R^2


In [None]:
{'MAE': mae, 'RMSE': rmse, 'R2': r2}  # metrics summary


# Cross-validation

In [None]:
cv = cross_validate(pipe, X, y, cv=5, scoring=('r2','neg_root_mean_squared_error'))  # 5-fold CV


In [None]:
{k: (np.mean(v), np.std(v)) for k, v in cv.items() if k.startswith('test_')}  # mean¬±std of CV scores


# MLflow Tracking ‚Äî log params, metrics, and model

In [None]:
import mlflow  # experiment tracking


In [None]:
import mlflow.sklearn  # sklearn flavor


In [None]:
mlflow.set_experiment('PolynomialRegression-WineQuality')  # experiment per algorithm


In [None]:
mlflow.start_run(run_name=f"{DATASET}")  # start run


In [None]:
mlflow.log_params({'degree': poly_deg})  # log polynomial degree


In [None]:
mlflow.log_metric('MAE', mae)  # MAE


In [None]:
mlflow.log_metric('RMSE', rmse)  # RMSE


In [None]:
mlflow.log_metric('R2', r2)  # R2


In [None]:
mlflow.sklearn.log_model(pipe, artifact_path='model')  # log pipeline


In [None]:
mlflow.end_run()  # end run


# Persistence ‚Äî save to disk and reload

In [None]:
import os  # fs utils


In [None]:
import joblib  # persistence


In [None]:
from datetime import datetime  # timestamp


In [None]:
os.makedirs('Machine-Learning/models/PolynomialRegression', exist_ok=True)  # ensure dir


In [None]:
MODEL_PATH = f"Machine-Learning/models/PolynomialRegression/polynomial_regression_deg{poly_deg}_wine_{DATASET}_" + datetime.now().strftime('%Y%m%d_%H%M%S') + ".joblib"  # path


In [None]:
joblib.dump(pipe, MODEL_PATH)  # save pipeline


In [None]:
loaded = joblib.load(MODEL_PATH)  # load back


In [None]:
float(loaded.predict(X_test.iloc[[0]])[0])  # quick check


# Streamlit App ‚Äî minimal predictor UI

In [None]:
APP_CODE = """
import joblib, pandas as pd, streamlit as st
st.set_page_config(page_title='Wine ‚Äî Polynomial Regression', page_icon='üç∑')
st.title('üç∑ Wine Quality ‚Äî Polynomial Regression')
MODEL_PATH = st.text_input('Model path', value=r'%s')
if MODEL_PATH and st.button('Load model'):
    try:
        st.session_state['model'] = joblib.load(MODEL_PATH)
        st.success('Model loaded')
    except Exception as e:
        st.error(f'Load failed: {e}')
if 'model' in st.session_state:
    cols = %s
    vals = {c: st.number_input(c, value=0.0) for c in cols}
    if st.button('Predict quality'):
        df = pd.DataFrame([vals])
        try:
            y = st.session_state['model'].predict(df)
            st.metric('Predicted quality', f"{float(y[0]):.3f}")
        except Exception as e:
            st.error(f'Predict failed: {e}')
""" % (MODEL_PATH, list(X.columns))


In [None]:
open('app_streamlit_wine.py', 'w').write(APP_CODE)  # write app file


# AWS S3 Upload (commented)

In [None]:
# import boto3


In [None]:
# s3 = boto3.client('s3')


In [None]:
# bucket = os.environ.get('S3_BUCKET','your-bucket')


In [None]:
# key = 'wine-models/PolynomialRegression/' + os.path.basename(MODEL_PATH)


In [None]:
# s3.upload_file(MODEL_PATH, bucket, key)


In [None]:
float(pipe.predict(X_test.iloc[[0]])[0])  # single-row prediction via pipeline
