# Assignment 4 — Linear Regression Neuron from Scratch
Predicting **abalone age** (Rings + 1.5) from physical measurements using a single neuron with gradient descent.

## 1. Install & Import

In [1]:
!pip install ucimlrepo -q

In [2]:
import numpy as np
import pandas as pd
import random
from ucimlrepo import fetch_ucirepo

## 2. Load the Abalone Dataset

In [3]:
# Fetch dataset from UCI ML Repository (id=1 → Abalone)
abalone_repo = fetch_ucirepo(id=1)

X_raw = abalone_repo.data.features
y_raw = abalone_repo.data.targets

# Combine into a single DataFrame
df = pd.concat([X_raw, y_raw], axis=1)
print('Shape:', df.shape)
df.head()

Shape: (4177, 9)


Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [4]:
print('Columns:', df.columns.tolist())
print('Rows:   ', len(df))

Columns: ['Sex', 'Length', 'Diameter', 'Height', 'Whole_weight', 'Shucked_weight', 'Viscera_weight', 'Shell_weight', 'Rings']
Rows:    4177


**Dataset Q&A:**

1. **Input features:** `Sex`, `Length`, `Diameter`, `Height`, `Whole_weight`, `Shucked_weight`, `Viscera_weight`, `Shell_weight`
2. **Output:** Age of abalone (numeric)
3. **Why numeric output?** → Age is a continuous value, making this a regression problem.

> We use only `Length`, `Diameter`, and `Whole_weight` as inputs to keep the model simple.

## 3. Prepare Features & Target

In [5]:
# Convert Rings → Age  (Age = Rings + 1.5)
age = df['Rings'] + 1.5

# Keep only the 3 chosen input features
feature_cols = ['Length', 'Diameter', 'Whole_weight']
df_model = df[feature_cols].copy()
df_model['Age'] = age

print('Final dataset shape:', df_model.shape)
df_model.head()

Final dataset shape: (4177, 4)


Unnamed: 0,Length,Diameter,Whole_weight,Age
0,0.455,0.365,0.514,16.5
1,0.35,0.265,0.2255,8.5
2,0.53,0.42,0.677,10.5
3,0.44,0.365,0.516,11.5
4,0.33,0.255,0.205,8.5


In [6]:
# Separate features and target as numpy arrays
X = df_model[feature_cols].values   # shape: (n_samples, 3)
y = df_model['Age'].values           # shape: (n_samples,)

print('X shape:', X.shape)
print('y shape:', y.shape)

X shape: (4177, 3)
y shape: (4177,)


## 4. Train-Test Split

In [7]:
def train_test_split_custom(X, y, test_size=0.3, random_seed=None):
    """Shuffle and split data into train/test sets."""
    paired_data = list(zip(X, y))
    if random_seed is not None:
        random.seed(random_seed)
    random.shuffle(paired_data)

    split_idx = int(len(paired_data) * (1 - test_size))
    train_data = paired_data[:split_idx]
    test_data  = paired_data[split_idx:]

    X_train, y_train = zip(*train_data)
    X_test,  y_test  = zip(*test_data)

    return (
        np.array(X_train), np.array(X_test),
        np.array(y_train), np.array(y_test)
    )


X_train, X_test, y_train, y_test = train_test_split_custom(X, y, test_size=0.2, random_seed=42)

# Reshape targets to column vectors for matrix math
y_train = y_train.reshape(-1, 1)
y_test  = y_test.reshape(-1, 1)

print('X_train:', X_train.shape, ' y_train:', y_train.shape)
print('X_test: ', X_test.shape,  ' y_test: ', y_test.shape)

X_train: (3341, 3)  y_train: (3341, 1)
X_test:  (836, 3)  y_test:  (836, 1)


## 5. Feature Scaling
Standard Scaler: $z = \frac{x - \mu}{\sigma}$

> Fit only on training data to avoid data leakage.

In [8]:
class StandardScaler:
    """Standardize features to zero mean and unit variance."""

    def __init__(self):
        self.mean_ = None
        self.std_  = None

    def fit(self, X):
        X = np.array(X)
        self.mean_ = X.mean(axis=0)
        self.std_  = X.std(axis=0)

    def transform(self, X):
        return (np.array(X) - self.mean_) / self.std_

    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

    def inverse_transform(self, X_scaled):
        return np.array(X_scaled) * self.std_ + self.mean_


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # fit + transform on train
X_test_scaled  = scaler.transform(X_test)        # transform only on test

## 6. Neuron Functions

In [9]:
def forward(X, weights, bias):
    """Linear forward pass: y_hat = X·w + b"""
    return np.dot(X, weights) + bias


def mean_squared_error(y_true, y_pred):
    """MSE loss."""
    return np.mean((y_true - y_pred) ** 2)


def compute_grad_weights(X, y_true, y_pred):
    """Gradient of MSE w.r.t. weights."""
    n_samples = len(y_true)
    return (-2 / n_samples) * np.dot(X.T, (y_true - y_pred))


def compute_grad_bias(y_true, y_pred):
    """Gradient of MSE w.r.t. bias."""
    n_samples = len(y_true)
    return (-2 / n_samples) * np.sum(y_true - y_pred)

## 7. Initialize & Train

In [10]:
n_features = X_train_scaled.shape[1]

# Small random weights, zero bias
weights = np.random.randn(n_features, 1) * 0.02
bias    = 0.0

learning_rate = 0.01
num_epochs    = 1000
loss_history  = []

for epoch in range(num_epochs):
    y_pred        = forward(X_train_scaled, weights, bias)
    current_loss  = mean_squared_error(y_train, y_pred)
    grad_weights  = compute_grad_weights(X_train_scaled, y_train, y_pred)
    grad_bias     = compute_grad_bias(y_train, y_pred)

    weights -= learning_rate * grad_weights
    bias    -= learning_rate * grad_bias
    loss_history.append(current_loss)

    if epoch % 100 == 0:
        print(f'Epoch {epoch:>4}  |  Loss: {current_loss:.4f}')

Epoch    0  |  Loss: 141.9052
Epoch  100  |  Loss: 9.4552
Epoch  200  |  Loss: 7.1732
Epoch  300  |  Loss: 7.1253
Epoch  400  |  Loss: 7.1177
Epoch  500  |  Loss: 7.1116
Epoch  600  |  Loss: 7.1061
Epoch  700  |  Loss: 7.1011
Epoch  800  |  Loss: 7.0964
Epoch  900  |  Loss: 7.0921


## 8. Evaluate on Test Set

In [11]:
y_test_pred = forward(X_test_scaled, weights, bias)

test_mse = mean_squared_error(y_test, y_test_pred)
test_mae = np.mean(np.abs(y_test - y_test_pred))

print(f'Test MSE: {test_mse:.4f}')
print(f'Test MAE: {test_mae:.4f}')

Test MSE: 6.5570
Test MAE: 1.8789


In [12]:
# Side-by-side comparison of actual vs predicted age
results_df = pd.DataFrame(X_test, columns=feature_cols)
results_df['Actual_Age']    = y_test.flatten()
results_df['Predicted_Age'] = y_test_pred.flatten().round(2)
results_df['Error']         = (results_df['Predicted_Age'] - results_df['Actual_Age']).round(2)
results_df.head(10)

Unnamed: 0,Length,Diameter,Whole_weight,Actual_Age,Predicted_Age,Error
0,0.275,0.195,0.0875,5.5,7.7,2.2
1,0.585,0.46,0.7635,10.5,12.22,1.72
2,0.595,0.455,0.886,11.5,12.26,0.76
3,0.4,0.295,0.252,7.5,9.37,1.87
4,0.67,0.54,1.482,11.5,13.89,2.39
5,0.6,0.475,1.13,10.5,12.67,2.17
6,0.595,0.465,1.107,13.5,12.52,-0.98
7,0.595,0.475,1.0305,11.5,12.6,1.1
8,0.45,0.335,0.3505,8.5,10.06,1.56
9,0.62,0.51,1.2815,11.5,13.26,1.76
