In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

# Function to split the data
def train_test_val_split(X, y, train_size, test_size, val_size):
    train_end = int(X.shape[0] * train_size)
    test_end = train_end + int(X.shape[0] * test_size)

    X_train, y_train = X[:train_end], y[:train_end]
    X_test, y_test = X[train_end:test_end], y[train_end:test_end]
    X_val, y_val = X[test_end:], y[test_end:]

    return X_train, X_test, X_val, y_train, y_test, y_val


# Fetch and split data
data = fetch_california_housing()
X, y = data.data, data.target

X_train, X_test, X_val, y_train, y_test, y_val = train_test_val_split(X, y, 0.7, 0.15, 0.15)

# Normalize features using training set statistics
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

# Convert to PyTorch tensors and reshape targets
X_train_tensors = torch.tensor(X_train, dtype=torch.float32)
X_test_tensors = torch.tensor(X_test, dtype=torch.float32)
X_val_tensors = torch.tensor(X_val, dtype=torch.float32)
y_train_tensors = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensors = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)
y_val_tensors = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

# Custom dataset class
class HousingDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

# Create datasets and dataloaders
train_tensors = HousingDataset(X_train_tensors, y_train_tensors)
test_tensors = HousingDataset(X_test_tensors, y_test_tensors)
val_tensors = HousingDataset(X_val_tensors, y_val_tensors)

train_loader = DataLoader(train_tensors, batch_size=64, shuffle=True)
test_loader = DataLoader(test_tensors, batch_size=64, shuffle=False)  # No shuffling
val_loader = DataLoader(val_tensors, batch_size=64, shuffle=False)    # No shuffling

# Define the model
class HousingModel(nn.Module):
    def __init__(self, input_dim):
        super(HousingModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
    
    def forward(self, x):
        return self.network(x)

# Initialize model, loss, and optimizer
input_dim = X_train.shape[1]
model = HousingModel(input_dim=input_dim)
loss_func = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# Training loop with validation
epochs = 50

for epoch in range(epochs):
    model.train()
    train_loss = 0.0

    # Training phase
    for feature, target in train_loader:
        optimizer.zero_grad()
        output = model(feature)
        loss = loss_func(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    train_loss /= len(train_loader)

    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for feature, target in val_loader:
            output = model(feature)
            loss = loss_func(output, target)
            val_loss += loss.item()
    
    val_loss /= len(val_loader)
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")


Epoch 1/50, Training Loss: 1.0372, Validation Loss: 0.3944
Epoch 2/50, Training Loss: 0.3957, Validation Loss: 0.3364
Epoch 3/50, Training Loss: 0.3582, Validation Loss: 0.3501
Epoch 4/50, Training Loss: 0.3392, Validation Loss: 0.4085
Epoch 5/50, Training Loss: 0.3275, Validation Loss: 0.4569
Epoch 6/50, Training Loss: 0.3322, Validation Loss: 0.3671
Epoch 7/50, Training Loss: 0.2991, Validation Loss: 0.3550
Epoch 8/50, Training Loss: 0.2890, Validation Loss: 0.3747
Epoch 9/50, Training Loss: 0.2865, Validation Loss: 0.5410
Epoch 10/50, Training Loss: 0.2834, Validation Loss: 0.4047
Epoch 11/50, Training Loss: 0.2850, Validation Loss: 0.5024
Epoch 12/50, Training Loss: 0.2797, Validation Loss: 0.3429
Epoch 13/50, Training Loss: 0.2647, Validation Loss: 0.3369
Epoch 14/50, Training Loss: 0.2628, Validation Loss: 0.3651
Epoch 15/50, Training Loss: 0.2616, Validation Loss: 0.3560
Epoch 16/50, Training Loss: 0.2561, Validation Loss: 0.3325
Epoch 17/50, Training Loss: 0.2553, Validation Lo