In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full)

scaler = StandardScaler()
#learn scaling parameters(mean and std) on the train data
X_train = torch.FloatTensor(scaler.fit_transform(X_train)).cuda()
#use learned scaling parameters
X_valid = torch.FloatTensor(scaler.transform(X_valid)).cuda()
X_test = torch.FloatTensor(scaler.transform(X_test)).cuda()

y_train = torch.FloatTensor(y_train).unsqueeze(1).cuda()
y_valid = torch.FloatTensor(y_valid).unsqueeze(1).cuda()
y_test = torch.FloatTensor(y_test).unsqueeze(1).cuda()

class WideAndDeepModel(nn.Module):
    def __init__(self, units=30):
        super().__init__()
        self.hidden1 = nn.Linear(6, units)
        self.hidden2 = nn.Linear(units, units)
        self.main_output = nn.Linear(35, 1)
        self.aux_output = nn.Linear(units, 1)
    
    def forward(self, inputs):
        input_A, input_B = inputs
        hidden1 = F.relu(self.hidden1(input_B))
        hidden2 = F.relu(self.hidden2(hidden1))
        concat = torch.cat((input_A, hidden2), dim=-1)
        main_output = self.main_output(concat)
        aux_output = self.aux_output(hidden2)
        return main_output, aux_output
model = WideAndDeepModel().cuda()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1e-3)

X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]
X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]
X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]

NUM_EPOCHS = 20
for epoch in range(NUM_EPOCHS):
    model.train()
    main_output, aux_output = model((X_train_A, X_train_B))
    loss = 0.9 * criterion(main_output, y_train) + 0.1 * criterion(aux_output, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    total_loss = loss.item()
    print(f"Epoch {epoch + 1}, train_loss: {total_loss}", end=" ")
    model.eval()
    with torch.no_grad():
        main_output, aux_output = model((X_valid_A, X_valid_B))
        loss = 0.9 * criterion(main_output, y_valid) + 0.1 * criterion(aux_output, y_valid)
        total_loss = loss.item()
        print(f"valid_loss: {total_loss}")

model.eval()
with torch.no_grad():
    main_output, aux_output = model((X_test_A, X_test_B))
    loss = 0.9 * criterion(main_output, y_test) + 0.1 * criterion(aux_output, y_test)
    total_loss = loss.item()
    print(f"test_loss: {total_loss}")


Epoch 1, train_loss: 5.918081283569336 valid_loss: 5.738646507263184
Epoch 2, train_loss: 5.914943218231201 valid_loss: 5.735588073730469
Epoch 3, train_loss: 5.911807060241699 valid_loss: 5.732532978057861
Epoch 4, train_loss: 5.908674240112305 valid_loss: 5.729480743408203
Epoch 5, train_loss: 5.905543804168701 valid_loss: 5.7264299392700195
Epoch 6, train_loss: 5.902416229248047 valid_loss: 5.723382472991943
Epoch 7, train_loss: 5.8992919921875 valid_loss: 5.720338821411133
Epoch 8, train_loss: 5.896169662475586 valid_loss: 5.717297554016113
Epoch 9, train_loss: 5.893051624298096 valid_loss: 5.714259147644043
Epoch 10, train_loss: 5.889935493469238 valid_loss: 5.711223125457764
Epoch 11, train_loss: 5.88682222366333 valid_loss: 5.708190441131592
Epoch 12, train_loss: 5.883710861206055 valid_loss: 5.705160140991211
Epoch 13, train_loss: 5.880603790283203 valid_loss: 5.702131748199463
Epoch 14, train_loss: 5.877500534057617 valid_loss: 5.699107646942139
Epoch 15, train_loss: 5.8743977