<a href="https://colab.research.google.com/github/poojabisht10/Deep-Learning/blob/main/UCS761_Lab5_Linear_Regression_From_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Dataset loading

In [33]:
# We are loading the Abalone dataset to study the relationship between physical measurements and the age of abalone.
# This dataset is suitable because age is a continuous value and can be predicted using regression.

import numpy as np
import pandas as pd

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data"

columns = [
    "Sex", "Length", "Diameter", "Height",
    "Whole_weight", "Shucked_weight",
    "Viscera_weight", "Shell_weight", "Rings"
]

data = pd.read_csv(url, header=None, names=columns)

# Print basic dataset info
print("Number of rows:", data.shape[0])
print("Column names:", list(data.columns))
print("\nFirst 5 rows:")
print(data.head())

Number of rows: 4177
Column names: ['Sex', 'Length', 'Diameter', 'Height', 'Whole_weight', 'Shucked_weight', 'Viscera_weight', 'Shell_weight', 'Rings']

First 5 rows:
  Sex  Length  Diameter  Height  Whole_weight  Shucked_weight  Viscera_weight  \
0   M   0.455     0.365   0.095        0.5140          0.2245          0.1010   
1   M   0.350     0.265   0.090        0.2255          0.0995          0.0485   
2   F   0.530     0.420   0.135        0.6770          0.2565          0.1415   
3   M   0.440     0.365   0.125        0.5160          0.2155          0.1140   
4   I   0.330     0.255   0.080        0.2050          0.0895          0.0395   

   Shell_weight  Rings  
0         0.150     15  
1         0.070      7  
2         0.210      9  
3         0.155     10  
4         0.055      7  


In [34]:
# what is input:
# The physical measurements of abalone such as length,diameter, and shell weight.
# what is output:
# The age of the abalone.
# why output is numeric:
# Age is measured in years, which is a numeric and continuous value.

Convert Target

In [35]:
# According to problem statement:
# Age = Rings + 1.5

data["Age"] = data["Rings"] + 1.5

y = data["Age"].values.reshape(-1, 1)

Choose feature

In [36]:
# Selecting exactly 3 numeric features
# We avoid 'Sex' because it is categorical

X = data[["Length", "Diameter", "Shell_weight"]].values

In [37]:
# We select exactly three numeric features as required.
# These features are chosen because they directly relate
# to the physical growth of abalone.

# Feature 1: Length
#   As abalone grows older, its overall length increases.

# Feature 2: Diameter
#   Diameter gives an idea of body thickness, which also
#   increases with age.

# Feature 3: Shell_weight
#   The shell becomes heavier as the abalone ages,
#   making it a strong indicator of age.

Train–Test Split

In [38]:
# Manual train-test split
N = X.shape[0]
split_index = int(0.8 * N)

X_train = X[:split_index]
X_test  = X[split_index:]

y_train = y[:split_index]
y_test  = y[split_index:]

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (3341, 3)
y_train shape: (3341, 1)
X_test shape: (836, 3)
y_test shape: (836, 1)


Normalize Inputs

In [39]:
# Compute mean and std ONLY from training data
mean = X_train.mean(axis=0)
std  = X_train.std(axis=0)

# Normalize train and test using training statistics
X_train_norm = (X_train - mean) / std
X_test_norm  = (X_test - mean) / std

In [40]:
#why normalization is needed for learning:
#Different features have different units and scales.
#Without normalization, features with large values dominate the learning process.
#Normalization helps gradient descent converge faster and more stably.

Define the Model (Forward Pass)

In [41]:
def forward(X, w, b):

    #Computes y_hat = Xw + b

    y_hat = np.dot(X, w) + b

    # Print shapes once for verification
    print("X shape:", X.shape)
    print("w shape:", w.shape)
    print("b shape:", b.shape)
    print("y_hat shape:", y_hat.shape)

    return y_hat

In [42]:
# parameters are:
#   w (weights) and b (bias)
# number of parameters:
#   For d=3 → 3 weights + 1 bias = 4 parameters

Loss Function (MSE)

In [43]:
def mse(y, y_hat):
    #Mean Squared Error loss

    loss = np.mean((y - y_hat) ** 2)
    return loss

In [44]:
# why square:
# Squaring ensures all errors are positive and differentiable.
# what mistakes are expensive:
# Large errors are penalized much more than small ones.

In [45]:
# what gradient means in words:
# Gradient tells us the direction and rate at which loss increases.
# why subtracting gradient reduces loss:
# Moving opposite to gradient moves us downhill on the loss surface.

Learning Rule (Gradient Descent)

In [46]:
def grad_w(X, y, y_hat):

    #Gradient of loss w.r.t weights

    N = X.shape[0]
    dW = (2/N) * np.dot(X.T, (y_hat - y))
    return dW


def grad_b(y, y_hat):
    """
    Gradient of loss w.r.t bias
    """
    db = 2 * np.mean(y_hat - y)
    return db

In [47]:
# Checkpoint:
# meaning of large gradient:
# Model is far from optimal; loss changes sharply.
# effect of too-large learning rate:
# Model may overshoot minimum and diverge.

Training Loop

In [48]:
# Initialize parameters
np.random.seed(42)
w = np.random.randn(3, 1) * 0.01
b = np.zeros((1,))

learning_rate = 0.01
epochs = 1000

loss_history = []

for epoch in range(epochs):

    # 1) Forward pass
    y_hat = np.dot(X_train_norm, w) + b

    # 2) Compute loss
    loss = mse(y_train, y_hat)
    loss_history.append(loss)

    # 3) Gradients
    dW = grad_w(X_train_norm, y_train, y_hat)
    db = grad_b(y_train, y_hat)

    # 4) Update
    w -= learning_rate * dW
    b -= learning_rate * db

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

Epoch 0, Loss: 144.2597
Epoch 100, Loss: 9.1999
Epoch 200, Loss: 6.7905
Epoch 300, Loss: 6.6844
Epoch 400, Loss: 6.6434
Epoch 500, Loss: 6.6182
Epoch 600, Loss: 6.6020
Epoch 700, Loss: 6.5913
Epoch 800, Loss: 6.5838
Epoch 900, Loss: 6.5783


In [49]:
# Initial expectation:
# Loss should decrease slowly but steadily.
# Revised expectation after training:
# Loss decreases smoothly without instability.

Evaluation

In [50]:
# Predictions on test set
y_test_hat = np.dot(X_test_norm, w) + b

# Test MSE
test_mse = mse(y_test, y_test_hat)

# Test MAE
test_mae = np.mean(np.abs(y_test - y_test_hat))

print("Test MSE:", test_mse)
print("Test MAE:", test_mae)

Test MSE: 5.117594287711015
Test MAE: 1.7243548318088096


Example Predictions

In [51]:
print("\nSample Predictions:")
for i in range(5):
    print(
        f"True age: {y_test[i][0]:.2f}, "
        f"Predicted age: {y_test_hat[i][0]:.2f}, "
        f"Absolute error: {abs(y_test[i][0] - y_test_hat[i][0]):.2f}"
    )


Sample Predictions:
True age: 13.50, Predicted age: 10.79, Absolute error: 2.71
True age: 15.50, Predicted age: 9.65, Absolute error: 5.85
True age: 14.50, Predicted age: 10.06, Absolute error: 4.44
True age: 14.50, Predicted age: 11.12, Absolute error: 3.38
True age: 13.50, Predicted age: 11.43, Absolute error: 2.07


In [53]:
# systematic errors:
#   The model consistently underpredicts the age of older abalones.
# observed bias:
#   This bias occurs because the relationship between physical measurements and age is nonlinear, while our model is linear.