<a href="https://colab.research.google.com/github/lokesh9297/Wine-Quality-Prediction-using-Linear-Regression-SVD-From-Scratch-/blob/main/SVD_Based_Regression_with_train_test_split.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from numpy.linalg import norm, inv, svd

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, delimiter=";")

# Separate features and target
X = df.drop(columns=["quality"])
y = df["quality"].values.reshape(-1, 1)

In [2]:
#Normalize
X_norm = (X - X.mean())/X.std()
x=X_norm.to_numpy()
x.shape

(1599, 11)

In [3]:
# Train-test split (80% train, 20% test)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [4]:
A_train = np.hstack((X_train, np.ones((X_train.shape[0], 1))))  # (1279, 12)
A_test = np.hstack((X_test, np.ones((X_test.shape[0], 1))))  # (320, 12)

# Target vector
B_train = y_train
B_test = y_test

In [5]:
# Compute SVD for A_train
U, S, VT = svd(A_train)

# Compute the pseudo-inverse using SVD
S_inv = np.zeros((12, 1279))
for i in range(len(S)):
  S_inv[i, i] = 1 / S[i]

V = VT.T
Ap_train = np.dot(np.dot(V, S_inv), U.T)

X = np.dot(Ap_train, B_train)

In [6]:
y_test_pred = A_test @ X
y_train_pred = A_train @ X

In [7]:
mse1 = np.mean((y_train- y_train_pred)**2)
mse2 = np.mean((y_test- y_test_pred)**2)
print(f"Final MSE on training data: {mse1:.4f}")
print(f"Final MSE on test data: {mse2:.4f}")


Final MSE on training data: 0.4242
Final MSE on test data: 0.3900


In [8]:
def r2_score(y, y_pred):
    ss_res = np.sum((y - y_pred) ** 2)
    ss_tot = np.sum((y - np.mean(y)) ** 2)
    return 1 - (ss_res / ss_tot)

r21 = r2_score(y_train, y_train_pred)
r22 = r2_score(y_test, y_test_pred)

print(f"R² Score on training data: {r21:.4f}")
print(f"R² Score on test data: {r22:.4f}")

R² Score on training data: 0.3480
R² Score on test data: 0.4032
