In [1]:
from sklearn.datasets import fetch_california_housing

import pandas as pd

california = fetch_california_housing(as_frame=True)

df = california.frame
print(df.head)

<bound method NDFrame.head of        MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0      8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1      8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2      7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3      5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4      3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   
...       ...       ...       ...        ...         ...       ...       ...   
20635  1.5603      25.0  5.045455   1.133333       845.0  2.560606     39.48   
20636  2.5568      18.0  6.114035   1.315789       356.0  3.122807     39.49   
20637  1.7000      17.0  5.205543   1.120092      1007.0  2.325635     39.43   
20638  1.8672      18.0  5.329513   1.171920       741.0  2.123209     39.43   
20639  2.3886      16.0  5.254717   1.162264      1387.0  2.616981     39.37   

       Lo

In [6]:
from sklearn.model_selection import train_test_split

X = df.drop("MedHouseVal", axis= 1).values
y = df["MedHouseVal"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, shuffle=True)
print(X_train.shape)


(16512, 8)


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class CaliforniaHousingDataset(Dataset):
    def __init__(self, X, y):
        super().__init__()
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
train_dataset = CaliforniaHousingDataset(X_train, y_train)
test_dataset = CaliforniaHousingDataset(X_test, y_test)

In [24]:
import torch.nn as nn
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32)

class RegressionModel(nn.Module):
    def __init__(self, in_features):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(in_features, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128,1)
        )

    def forward(self, X):
        return self.model(X)

In [27]:

in_features = len(train_dataset.X[0])
model = RegressionModel(in_features)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)
loss_fn = nn.MSELoss()
optim = torch.optim.Adam(model.parameters(), lr=0.001)


model.train()
def train(dataloader, model, loss_fn, optim):
    expected_loss = 0
    num_batches = len(dataloader)
    for (X,y) in (dataloader):
        X, y = X.to(device), y.to(device)

        optim.zero_grad()

        y_hat = model(X)
        loss = loss_fn(y_hat, y)

        loss.backward()
        optim.step()
    
        expected_loss += loss.item()        

    return expected_loss / num_batches

for epoch in range(50):
    avg_loss = train(dataloader=train_dataloader, model=model, loss_fn=loss_fn, optim=optim)
    print(f"Epoch {epoch}: {avg_loss}")
        

Epoch 0: 30.269360798158388
Epoch 1: 1.8248338556104853
Epoch 2: 3.8187041990747748
Epoch 3: 3.913353984323583
Epoch 4: 6.313650853874147
Epoch 5: 1.9551215682380882
Epoch 6: 4.669329085437826
Epoch 7: 9.45811692677265
Epoch 8: 1.4239951932961628
Epoch 9: 1.475822536982307
Epoch 10: 1.4392679448044576
Epoch 11: 2.1586488626262015
Epoch 12: 1.441072925694229
Epoch 13: 1.5848905117821324
Epoch 14: 1.63350149763878
Epoch 15: 1.8285119366045146
Epoch 16: 1.5332544641208279
Epoch 17: 1.5047625075708064
Epoch 18: 1.7112119576727698
Epoch 19: 1.3761542140282401
Epoch 20: 1.4144404233657113
Epoch 21: 1.3806727147379587
Epoch 22: 1.3835528023367705
Epoch 23: 1.3637934654026993
Epoch 24: 1.3478941475005113
Epoch 25: 1.3732606661181117
Epoch 26: 1.3507160679545513
Epoch 27: 1.3600786840037782
Epoch 28: 1.3491557801878729
Epoch 29: 1.3567999839320664
Epoch 30: 1.342363494542218
Epoch 31: 1.3395521832528965
Epoch 32: 1.3424408196132311
Epoch 33: 1.3501202994307806
Epoch 34: 1.352609814599503
Epoch 

In [28]:
model.eval()

total_mae = 0
total_loss = 0
total_samples = 0
for X, y in test_dataloader:
    with torch.no_grad():
        X, y = X.to(device), y.to(device)
        y_hat = model(X)
        loss = loss_fn(y_hat.squeeze(-1), y)

        total_loss += loss.item() * X.size(0)
        total_samples += X.size(0)

        mae = torch.abs(y_hat.squeeze()-y).sum()
        total_mae += mae.item()

avg_mae = total_mae / total_samples
avg_loss = total_loss / total_samples
print(f"Test MAE: {avg_mae:.4f} {avg_loss:.4f}")

print(torch.min(y), torch.max(y), torch.mean(y))

    





Test MAE: 0.9053 1.3160
tensor(0.4460) tensor(5.0000) tensor(2.0148)
