<a href="https://colab.research.google.com/github/lokesh9297/Irish-Flower-Classification/blob/main/Simple_Linear_Regression_with_train_test_split.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, delimiter=";")

# Separate features and target
X = df.drop(columns=["quality"])
y = df["quality"].values.reshape(-1, 1)

In [2]:
#Normalize
X_norm = (X - X.mean())/X.std()
x=X_norm.to_numpy()
x.shape

(1599, 11)

In [3]:
# Train-test split (80% train, 20% test)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [4]:
#Initiallize parameters
w = np.ones((11,1))
b= 0
w.shape

(11, 1)

In [5]:
def pred(x,w,b):
  y_pred =np.zeros([len(x),1])
  for i in range(len(x)):
    y_pred[i]= np.dot(x[i,:],w)+b
  return y_pred

In [6]:
def cost_function(y_pred,y):
  m= len(y)
  return (1/(2*m))*np.sum((y_pred-y)**2)


In [7]:
def compute_gradient(x,y,w,b):
  m= len(y)
  y_pred = pred(x,w,b)
  dw = (1/m)*np.dot(x.T,(y_pred-y))
  db = (1/m)*np.sum(y_pred-y)
  return dw,db

In [8]:
def gradient_descent(x,y,w,b,learning_rate,epochs):
    for i in range(epochs):
        y_pred = pred(x,w,b)
        dw, db = compute_gradient(x,y,w,b)
        w -= learning_rate * dw
        b -= learning_rate * db
        if i % 100 == 0:  # Print cost every 100 iterations
          print(f"Epoch {i}, Cost: {cost_function(y_pred, y):.4f}")
    return w, b


In [9]:
learning_rate = 0.01
epochs = 1001
w, b = gradient_descent(X_train, y_train, w, b, learning_rate, epochs)


Epoch 0, Cost: 22.4487
Epoch 100, Cost: 3.0095
Epoch 200, Cost: 0.7019
Epoch 300, Cost: 0.3270
Epoch 400, Cost: 0.2493
Epoch 500, Cost: 0.2276
Epoch 600, Cost: 0.2199
Epoch 700, Cost: 0.2167
Epoch 800, Cost: 0.2152
Epoch 900, Cost: 0.2144
Epoch 1000, Cost: 0.2140


In [10]:
y_test_pred = pred(X_test, w, b)
y_train_pred = pred(X_train, w, b)

In [11]:
mse1 = np.mean((y_train- y_train_pred)**2)
mse2 = np.mean((y_test- y_test_pred)**2)
print(f"Final MSE on training data: {mse1:.4f}")
print(f"Final MSE on test data: {mse2:.4f}")


Final MSE on training data: 0.4279
Final MSE on test data: 0.3931


In [12]:
def r2_score(y, y_pred):
    ss_res = np.sum((y - y_pred) ** 2)
    ss_tot = np.sum((y - np.mean(y)) ** 2)
    return 1 - (ss_res / ss_tot)

r21 = r2_score(y_train, y_train_pred)
r22 = r2_score(y_test, y_test_pred)

print(f"R² Score on training data: {r21:.4f}")
print(f"R² Score on test data: {r22:.4f}")

R² Score on training data: 0.3422
R² Score on test data: 0.3984
