In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [16]:
# Load dataset
dataset = fetch_california_housing()
print(dataset)

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]]), 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]), 'frame': None, 'target_names': ['MedHouseVal'], 'feature_names': ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude'], 'DESCR': '.. _california_housing_dataset:\n\nCalifornia Housing dataset\n-

In [17]:
X = dataset.data
y = dataset.target
print(X,y)

[[   8.3252       41.            6.98412698 ...    2.55555556
    37.88       -122.23      ]
 [   8.3014       21.            6.23813708 ...    2.10984183
    37.86       -122.22      ]
 [   7.2574       52.            8.28813559 ...    2.80225989
    37.85       -122.24      ]
 ...
 [   1.7          17.            5.20554273 ...    2.3256351
    39.43       -121.22      ]
 [   1.8672       18.            5.32951289 ...    2.12320917
    39.43       -121.32      ]
 [   2.3886       16.            5.25471698 ...    2.61698113
    39.37       -121.24      ]] [4.526 3.585 3.521 ... 0.923 0.847 0.894]


In [18]:
# Train-Test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [20]:
# Adding a bias (column of ones) to features
X_train = np.c_[np.ones(X_train.shape[0]), X_train]
X_test = np.c_[np.ones(X_test.shape[0]), X_test]

In [21]:
# Initialising weights
n_features = X_train.shape[1]
weights = np.zeros(n_features)    # w0, w1, w2...

Helper Functions

In [22]:
# Prediction
def predict(X, weights):
    return np.dot(X, weights)

In [23]:
# Mean Squared Error
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [24]:
# Gradient Descent step
def gradient_descent_step(X, y_true, weights, learning_rate):
  n = len(y)
  y_pred = predict(X, weights)
  gradient = -(2/n) * np.dot(X.T, (y_true - y_pred))  #derivative of MSE
  weights = weights - learning_rate * gradient
  return weights

In [25]:
# Train the model
learning_rate = 0.01
epochs = 1000  # number of iterations

for i in range(epochs):
  weights = gradient_descent_step(X_train, y_train, weights, learning_rate)

  if i % 100 == 0:
    y_pred = predict(X_train, weights)
    loss = mse(y_train, y_pred)
    print(f"Epoch {i}, Training MSE: {loss:.4f}")


Epoch 0, Training MSE: 5.4706
Epoch 100, Training MSE: 0.8204
Epoch 200, Training MSE: 0.6116
Epoch 300, Training MSE: 0.5848
Epoch 400, Training MSE: 0.5697
Epoch 500, Training MSE: 0.5583
Epoch 600, Training MSE: 0.5495
Epoch 700, Training MSE: 0.5426
Epoch 800, Training MSE: 0.5373
Epoch 900, Training MSE: 0.5331


In [26]:
# Evaluate on Test Data
y_test_pred = predict(X_test, weights)
test_loss = mse(y_test, y_test_pred)
print(f"\nFinal Test MSE: {test_loss:.4f}")


Final Test MSE: 0.5564
