In [None]:
import pandas as pd

# Load the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
data = pd.read_csv(url, sep=';')

# Display the first few rows
print(data.head())


   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8        5  
2      9.8        5 

 Batch Gradient Descent

In [None]:
def stochastic_gradient_descent(X, y, learning_rate=0.01, epochs=1000):
    m, n = X.shape
    theta = np.zeros(n)
    for epoch in range(epochs):
        for i in range(m):
            rand_index = np.random.randint(m)
            xi = X[rand_index:rand_index+1]
            yi = y[rand_index:rand_index+1]
            gradients = 2 * xi.T.dot(xi.dot(theta) - yi)
            theta -= learning_rate * gradients
    return theta


In [None]:
def mini_batch_gradient_descent(X, y, learning_rate=0.01, epochs=1000, batch_size=32):
    m, n = X.shape
    theta = np.zeros(n)
    for epoch in range(epochs):
        indices = np.random.permutation(m)
        X_shuffled = X[indices]
        y_shuffled = y[indices]
        for i in range(0, m, batch_size):
            xi = X_shuffled[i:i+batch_size]
            yi = y_shuffled[i:i+batch_size]
            gradients = (2/xi.shape[0]) * xi.T.dot(xi.dot(theta) - yi)
            theta -= learning_rate * gradients
    return theta



In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Features and target
X = data.drop('quality', axis=1).values
y = data['quality'].values

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Add a bias term (intercept) to the features
X_train_b = np.c_[np.ones((X_train_scaled.shape[0], 1)), X_train_scaled]
X_test_b = np.c_[np.ones((X_test_scaled.shape[0], 1)), X_test_scaled]

# Train using Batch Gradient Descent
theta_batch = batch_gradient_descent(X_train_b, y_train)

# Train using Stochastic Gradient Descent
theta_sgd = stochastic_gradient_descent(X_train_b, y_train)

# Train using Mini-Batch Gradient Descent
theta_mini_batch = mini_batch_gradient_descent(X_train_b, y_train)


In [None]:
from sklearn.metrics import mean_squared_error

# Predictions
y_pred_batch = X_test_b.dot(theta_batch)
y_pred_sgd = X_test_b.dot(theta_sgd)
y_pred_mini_batch = X_test_b.dot(theta_mini_batch)

# Evaluation
mse_batch = mean_squared_error(y_test, y_pred_batch)
mse_sgd = mean_squared_error(y_test, y_pred_sgd)
mse_mini_batch = mean_squared_error(y_test, y_pred_mini_batch)

print(f'Batch GD MSE: {mse_batch}')
print(f'SGD MSE: {mse_sgd}')
print(f'Mini-Batch GD MSE: {mse_mini_batch}')


Batch GD MSE: 0.38983888020776714
SGD MSE: 0.46067829320966336
Mini-Batch GD MSE: 0.39268572510975985


pre - processing

In [None]:
print(data.isnull().sum())


fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64


linear regression

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize Linear Regression model
lin_reg = LinearRegression()

# Train the model
lin_reg.fit(X_train, y_train)

# Predict on test set
y_pred = lin_reg.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)



print(f"Linear Regression - MSE: {mse:.4f}")
print(f"Linear Regression - R2 Score: {r2:.4f}")


Linear Regression - MSE: 0.3900
Linear Regression - R2 Score: 0.4032


In [None]:
from sklearn.preprocessing import PolynomialFeatures

# Create polynomial features
poly = PolynomialFeatures(degree=2)  # You can try degree=2 or 3
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Initialize and train
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)

# Predict
y_pred_poly = poly_reg.predict(X_test_poly)

# Evaluate
mse_poly = mean_squared_error(y_test, y_pred_poly)
r2_poly = r2_score(y_test, y_pred_poly)

print(f"Polynomial Regression (Degree 2) - MSE: {mse_poly:.4f}")
print(f"Polynomial Regression (Degree 2) - R2 Score: {r2_poly:.4f}")


Polynomial Regression (Degree 2) - MSE: 0.3819
Polynomial Regression (Degree 2) - R2 Score: 0.4157


Add Regularization (L1 and L2)

l2

In [None]:
from sklearn.linear_model import Ridge

# Initialize Ridge Regression
ridge_reg = Ridge(alpha=1.0)  # alpha is the regularization strength

# Train
ridge_reg.fit(X_train, y_train)

# Predict
y_pred_ridge = ridge_reg.predict(X_test)

# Evaluate
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print(f"Ridge Regression - MSE: {mse_ridge:.4f}")
print(f"Ridge Regression - R2 Score: {r2_ridge:.4f}")


Ridge Regression - MSE: 0.3929
Ridge Regression - R2 Score: 0.3987


l1

In [None]:
from sklearn.linear_model import Lasso

# Initialize Lasso Regression
lasso_reg = Lasso(alpha=0.1)  # alpha controls sparsity

# Train
lasso_reg.fit(X_train, y_train)

# Predict
y_pred_lasso = lasso_reg.predict(X_test)

# Evaluate
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

print(f"Lasso Regression - MSE: {mse_lasso:.4f}")
print(f"Lasso Regression - R2 Score: {r2_lasso:.4f}")


Lasso Regression - MSE: 0.4987
Lasso Regression - R2 Score: 0.2369


In [None]:
def batch_gradient_descent_with_early_stopping(X_train, y_train, X_val, y_val, learning_rate=0.01, epochs=1000, patience=10):
    m, n = X_train.shape
    theta = np.zeros(n)
    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(epochs):
        # Training step
        gradients = (2/m) * X_train.T.dot(X_train.dot(theta) - y_train)
        theta -= learning_rate * gradients

        # Validation loss
        val_loss = np.mean((X_val.dot(theta) - y_val) ** 2)

        print(f"Epoch {epoch}: Validation Loss = {val_loss:.4f}")

        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch}")
            break

    return theta


In [None]:
# Already split X into train and test, now split train into train/validation
from sklearn.model_selection import train_test_split

X_train_final, X_val, y_train_final, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Create a dictionary of all your models and predictions
models = {
    "Linear Regression": (y_test,y_pred),
    "Polynomial Regression (Degree 2)": (y_test, y_pred_poly),
    "Ridge Regression (L2)": (y_test, y_pred_ridge),
    "Lasso Regression (L1)": (y_test, y_pred_lasso),
    "Batch Gradient Descent (Early Stopping)": (y_test, y_pred_batch)
}

# Print comparison
print("\n📊 Model Performance Comparison:\n")
for model_name, (y_true, y_pred) in models.items():
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{model_name:<40} MSE: {mse:.4f} | R2 Score: {r2:.4f}")



📊 Model Performance Comparison:

Linear Regression                        MSE: 0.3900 | R2 Score: 0.4032
Polynomial Regression (Degree 2)         MSE: 0.3819 | R2 Score: 0.4157
Ridge Regression (L2)                    MSE: 0.3929 | R2 Score: 0.3987
Lasso Regression (L1)                    MSE: 0.4987 | R2 Score: 0.2369
Batch Gradient Descent (Early Stopping)  MSE: 0.3898 | R2 Score: 0.4035


In [None]:
pip install huggingface_hub



In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import os
import shutil
import joblib
import numpy as np

# Step 1: Create a clean folder
folder_name = "wine_models"

# If the folder already exists, delete it first
if os.path.exists(folder_name):
    shutil.rmtree(folder_name)

# Create a fresh folder
os.makedirs(folder_name)

# Step 2: Save the models inside the new folder
joblib.dump(lin_reg, os.path.join(folder_name, "linear_regression_model.pkl"))
joblib.dump(poly_reg, os.path.join(folder_name, "polynomial_regression_model.pkl"))
joblib.dump(ridge_reg, os.path.join(folder_name, "ridge_regression_model.pkl"))
joblib.dump(lasso_reg, os.path.join(folder_name, "lasso_regression_model.pkl"))

# Save the custom gradient descent thetas
np.save(os.path.join(folder_name, "theta_batch.npy"), theta_batch)
np.save(os.path.join(folder_name, "theta_sgd.npy"), theta_sgd)
np.save(os.path.join(folder_name, "theta_mini_batch.npy"), theta_mini_batch)




In [None]:
from huggingface_hub import upload_folder

# Define your repository ID
repo_id = "professorsab/wine-quality-regression-models"  # Replace with your username and repo name
folder_path = "/content/wine_models"
# List of files to upload

upload_folder(
    folder_path=folder_path,
    path_in_repo="",  # Upload to the root of the repository
    repo_id=repo_id,
    repo_type="model",  # Specify the repository type
)




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


lasso_regression_model.pkl:   0%|          | 0.00/720 [00:00<?, ?B/s]

linear_regression_model.pkl:   0%|          | 0.00/721 [00:00<?, ?B/s]

theta_batch.npy:   0%|          | 0.00/224 [00:00<?, ?B/s]

Upload 7 LFS files:   0%|          | 0/7 [00:00<?, ?it/s]

polynomial_regression_model.pkl:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

ridge_regression_model.pkl:   0%|          | 0.00/641 [00:00<?, ?B/s]

theta_mini_batch.npy:   0%|          | 0.00/224 [00:00<?, ?B/s]

theta_sgd.npy:   0%|          | 0.00/224 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/professorsab/wine-quality-regression-models/commit/7578acbf4ceae692b2cc45eafd4b783f520b18c0', commit_message='Upload folder using huggingface_hub', commit_description='', oid='7578acbf4ceae692b2cc45eafd4b783f520b18c0', pr_url=None, repo_url=RepoUrl('https://huggingface.co/professorsab/wine-quality-regression-models', endpoint='https://huggingface.co', repo_type='model', repo_id='professorsab/wine-quality-regression-models'), pr_revision=None, pr_num=None)