<a href="https://colab.research.google.com/github/lokesh9297/Wine-Quality-Prediction-using-Linear-Regression-SVD-From-Scratch-/blob/main/Simple_Linear_Regression_and_SVD_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, delimiter=";")

# Separate features and target
X = df.drop(columns=["quality"])
y = df["quality"].values.reshape(-1, 1)

In [2]:
#Normalize
X_norm = (X - X.mean())/X.std()
x=X_norm.to_numpy()
x.shape

(1599, 11)

In [3]:
#Initiallize parameters
w = np.ones((11,1))
b= 0
w.shape

(11, 1)

In [4]:
def pred(x,w,b):
  y_pred =np.zeros([len(x),1])
  for i in range(len(x)):
    y_pred[i]= np.dot(x[i,:],w)+b
  return y_pred

In [5]:
def cost_function(y_pred,y):
  m= len(y)
  return (1/(2*m))*np.sum((y_pred-y)**2)


In [6]:
def compute_gradient(x,y,w,b):
  m= len(y)
  y_pred = pred(x,w,b)
  dw = (1/m)*np.dot(x.T,(y_pred-y))
  db = (1/m)*np.sum(y_pred-y)
  return dw,db

In [7]:
def gradient_descent(x,y,w,b,learning_rate,epochs):
    for i in range(epochs):
        y_pred = pred(x,w,b)
        dw, db = compute_gradient(x,y,w,b)
        w -= learning_rate * dw
        b -= learning_rate * db
        if i % 100 == 0:  # Print cost every 100 iterations
          print(f"Epoch {i}, Cost: {cost_function(y_pred, y):.4f}")
    return w, b


In [8]:
learning_rate = 0.01
epochs = 1001
w, b = gradient_descent(x, y, w, b, learning_rate, epochs)


Epoch 0, Cost: 22.8770
Epoch 100, Cost: 2.9735
Epoch 200, Cost: 0.6769
Epoch 300, Cost: 0.3151
Epoch 400, Cost: 0.2424
Epoch 500, Cost: 0.2226
Epoch 600, Cost: 0.2156
Epoch 700, Cost: 0.2127
Epoch 800, Cost: 0.2113
Epoch 900, Cost: 0.2106
Epoch 1000, Cost: 0.2102


In [9]:
y_pred = pred(x, w, b)
print("Predicted Quality (First 5 samples):", y_pred[:5].flatten())


Predicted Quality (First 5 samples): [5.04454286 5.08562657 5.16483032 5.77411226 5.04454286]


In [10]:
mse = np.mean((y- y_pred)**2)
print(f"Final MSE: {mse:.4f}")


Final MSE: 0.4203


In [11]:
def r2_score(y, y_pred):
    ss_res = np.sum((y - y_pred) ** 2)
    ss_tot = np.sum((y - np.mean(y)) ** 2)
    return 1 - (ss_res / ss_tot)

r2 = r2_score(y, y_pred)

print(f"R² Score: {r2:.4f}")

R² Score: 0.3551


#Linear regression using SVD

In [12]:
import numpy as np
from numpy.linalg import norm, inv, svd

A=np.zeros([1599,12])
B=np.zeros([1599,1])
X= np.zeros([12,1])

A[:,0:11]= x
A[:,11] =1
B= y

In [13]:
U, S, VT=svd(A)
UT= U.T
Sk = np.zeros([12,1599])
for i in range(12):
  Sk[i][i]= 1/S[i]
V= VT.T
Ap= np.dot(np.dot(V,Sk), UT)

X= np.dot(Ap,B)




In [14]:
mse = np.mean((B- A@X)**2)
print(f"Final MSE: {mse:.4f}")


Final MSE: 0.4168


In [15]:
def r2_score(y, y_pred):
    ss_res = np.sum((y - y_pred) ** 2)
    ss_tot = np.sum((y - np.mean(y)) ** 2)
    return 1 - (ss_res / ss_tot)

r2 = r2_score(B, A@X)

print(f"R² Score: {r2:.4f}")

R² Score: 0.3606
