# Wine Quality ‚Äî Linear Regression (one command per code cell)
# Steps: load data ‚Üí quick checks ‚Üí split ‚Üí simple linear model ‚Üí evaluate ‚Üí CV ‚Üí persistence (commented) ‚Üí inference.


# Imports

In [None]:
import pandas as pd  # tabular data handling


In [None]:
import numpy as np  # numerical helpers


In [None]:
from sklearn.model_selection import train_test_split  # train/test split


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score  # evaluation metrics


In [None]:
from sklearn.linear_model import LinearRegression  # ordinary least squares regressor


# Load dataset (choose red or white)

In [None]:
DATASET = 'red'  # set to 'red' or 'white' to pick which wine dataset to use


In [None]:
CSV_PATH = f"/Users/sb/Documents/Study-Material/Machine-Learning/winequality-{DATASET}.csv"  # absolute path to selected dataset


In [None]:
df = pd.read_csv(CSV_PATH, sep=';')  # load CSV with semicolon separator


# Quick checks

In [None]:
df.head()  # preview first rows to confirm load


In [None]:
df.info()  # schema: columns, dtypes, non-null counts


In [None]:
df.describe()  # basic statistics for numeric columns


# Train/test split

In [None]:
X = df.drop(columns=['quality'])  # features matrix (all but target)


In [None]:
y = df['quality']  # target vector (wine quality score)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # hold-out split


# Model: LinearRegression (no scaling required for OLS)

In [None]:
linreg = LinearRegression()  # instantiate linear regressor


In [None]:
linreg.fit(X_train, y_train)  # fit model on training data


In [None]:
y_pred = linreg.predict(X_test)  # predict on test set


# Evaluation (MAE, RMSE, R^2)

In [None]:
mae = mean_absolute_error(y_test, y_pred)  # mean absolute error


In [None]:
rmse = mean_squared_error(y_test, y_pred, squared=False)  # root mean squared error


In [None]:
r2 = r2_score(y_test, y_pred)  # coefficient of determination


In [None]:
{'MAE': mae, 'RMSE': rmse, 'R2': r2}  # collect metrics in a dict for display


# Cross-validation (quick view)

In [None]:
from sklearn.model_selection import cross_validate  # cross-validation helper


In [None]:
cv_res = cross_validate(linreg, X, y, cv=5, scoring=('r2','neg_root_mean_squared_error'))  # 5-fold CV with common scores


In [None]:
{k: (np.mean(v), np.std(v)) for k, v in cv_res.items() if k.startswith('test_')}  # show mean¬±std for test scores


# MLflow Tracking ‚Äî log params, metrics, and model (install mlflow if needed)
# - To view UI: run `mlflow ui` in a terminal and open the shown URL.
# - If mlflow isn't installed, uncomment: `# !pip install mlflow`

In [None]:
import mlflow  # experiment tracking (install if missing)


In [None]:
import mlflow.sklearn  # sklearn flavor helpers for logging models


In [None]:
mlflow.set_experiment('LinearRegression-WineQuality')  # create/select experiment for this algorithm


In [None]:
mlflow.start_run(run_name=f"{DATASET}")  # begin a new MLflow run scoped to the dataset


In [None]:
mlflow.log_params({'fit_intercept': getattr(linreg, 'fit_intercept', None), 'copy_X': getattr(linreg, 'copy_X', None)})  # log core hyperparameters


In [None]:
mlflow.log_metric('MAE', mae)  # log MAE


In [None]:
mlflow.log_metric('RMSE', rmse)  # log RMSE


In [None]:
mlflow.log_metric('R2', r2)  # log R^2


In [None]:
mlflow.sklearn.log_model(linreg, artifact_path='model')  # log fitted model artifact


In [None]:
mlflow.end_run()  # end the MLflow run


# Persistence ‚Äî save to disk and reload for inference
# - Models are stored under Machine-Learning/models/LinearRegression/

In [None]:
import os  # filesystem utilities


In [None]:
import joblib  # model persistence utility


In [None]:
from datetime import datetime  # timestamp for unique filenames


In [None]:
os.makedirs('Machine-Learning/models/LinearRegression', exist_ok=True)  # ensure output directory exists


In [None]:
MODEL_PATH = f"Machine-Learning/models/LinearRegression/linear_regression_wine_{DATASET}_" + datetime.now().strftime('%Y%m%d_%H%M%S') + ".joblib"  # build timestamped model path


In [None]:
joblib.dump(linreg, MODEL_PATH)  # save trained model to disk


In [None]:
loaded = joblib.load(MODEL_PATH)  # load the saved model


In [None]:
float(loaded.predict(X_test.iloc[[0]])[0])  # verify loaded model prediction


# Streamlit App ‚Äî write a minimal app to load the saved model and predict
# - Run locally: `streamlit run app_streamlit_wine.py`
# - For Streamlit Cloud, create secrets and a requirements.txt (see commented cell below)

In [None]:
APP_CODE = """
import joblib
import pandas as pd
import streamlit as st

st.set_page_config(page_title='Wine Quality ‚Äî LinearRegression', page_icon='üç∑')
st.title('üç∑ Wine Quality ‚Äî Linear Regression Predictor')

MODEL_PATH = st.text_input('Model path', value=r'%s')

if MODEL_PATH and st.button('Load model'):
    try:
        st.session_state['model'] = joblib.load(MODEL_PATH)
        st.success('Model loaded')
    except Exception as e:
        st.error(f'Failed to load model: {e}')

if 'model' in st.session_state:
    st.subheader('Input features')
    # Default feature names for wine quality dataset (order matches CSV columns except target)
    cols = %s
    vals = {}
    for c in cols:
        vals[c] = st.number_input(c, value=0.0)
    if st.button('Predict quality'):
        df = pd.DataFrame([vals])
        try:
            y = st.session_state['model'].predict(df)
            st.metric('Predicted quality', f"{float(y[0]):.3f}")
        except Exception as e:
            st.error(f'Prediction failed: {e}')
""" % (MODEL_PATH, list(X.columns))  # embed path and columns


In [None]:
open('app_streamlit_wine.py', 'w').write(APP_CODE)  # write app file


# (Optional) Streamlit Cloud requirements (commented ‚Äî edit as needed)

In [None]:
# # with open('requirements.txt','w') as f: f.write("\n".join(['pandas','numpy','scikit-learn','joblib','streamlit']))


# AWS Upload (S3) ‚Äî commented stubs to push the model artifact
# - Set AWS creds via env or config; uncomment to use.

In [None]:
# import boto3  # AWS SDK for Python


In [None]:
# s3 = boto3.client('s3')  # create S3 client


In [None]:
# bucket = os.environ.get('S3_BUCKET', 'your-bucket-name')  # target bucket name


In [None]:
# key = 'wine-models/LinearRegression/' + os.path.basename(MODEL_PATH)  # S3 object key


In [None]:
# s3.upload_file(MODEL_PATH, bucket, key)  # upload model file to S3


In [None]:
# print(f'Uploaded to s3://{bucket}/{key}')  # confirm upload destination


# Inference example (single row)

In [None]:
sample_row = X_test.iloc[[0]]  # pick one sample row from test set


In [None]:
float(linreg.predict(sample_row)[0])  # predict quality for the sample row
