In [2]:
# prompt: build a similar code but use pytorch

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load the California Housing dataset
boston = fetch_california_housing()

# Preprocess the data
scaler = StandardScaler()
X = scaler.fit_transform(boston.data)
y = boston.target.reshape(-1, 1)

# Convert to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the neural network architecture
class Net(nn.Module):
    def __init__(self, layers, activation):
        super(Net, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(len(layers) - 1):
            self.layers.append(nn.Linear(layers[i], layers[i+1]))
            if i < len(layers) - 2:
              if activation == 'relu':
                self.layers.append(nn.ReLU())
              elif activation == 'tanh':
                self.layers.append(nn.Tanh())
              elif activation == 'sigmoid':
                self.layers.append(nn.Sigmoid())

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

# Define a list of architectures to compare
architectures = [
    [X.shape[1], 64, 1],
    [X.shape[1], 128, 1],
    [X.shape[1], 64, 64, 1],
    [X.shape[1], 128, 64, 1],
]

# Define a list of activation functions to compare
activation_functions = [
    'relu',
    'tanh',
    'sigmoid',
]

# Define a list of learning rates to compare
learning_rates = [
    0.001,
    0.01,
    0.1,
]

# Train and evaluate each model
for architecture in architectures:
    for activation_function in activation_functions:
        for learning_rate in learning_rates:
            # Define and train the model
            model = Net(architecture, activation_function)
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)

            # Training loop
            epochs = 20
            for epoch in range(epochs):
                optimizer.zero_grad()
                outputs = model(X_train)
                loss = criterion(outputs, y_train)
                loss.backward()
                optimizer.step()

            # Evaluate the model
            with torch.no_grad():
              outputs_test = model(X_test)
              val_loss = criterion(outputs_test, y_test)
              print(f'Architecture: {architecture[1:]}, Activation Function: {activation_function}, Learning Rate: {learning_rate}')
              print(f'Validation Loss: {val_loss:.4f}\n')


Architecture: [64, 1], Activation Function: relu, Learning Rate: 0.001
Validation Loss: 3.9237

Architecture: [64, 1], Activation Function: relu, Learning Rate: 0.01
Validation Loss: 1.0346

Architecture: [64, 1], Activation Function: relu, Learning Rate: 0.1
Validation Loss: 0.4602

Architecture: [64, 1], Activation Function: tanh, Learning Rate: 0.001
Validation Loss: 3.5216

Architecture: [64, 1], Activation Function: tanh, Learning Rate: 0.01
Validation Loss: 0.8417

Architecture: [64, 1], Activation Function: tanh, Learning Rate: 0.1
Validation Loss: 0.5510

Architecture: [64, 1], Activation Function: sigmoid, Learning Rate: 0.001
Validation Loss: 3.5505

Architecture: [64, 1], Activation Function: sigmoid, Learning Rate: 0.01
Validation Loss: 0.8226

Architecture: [64, 1], Activation Function: sigmoid, Learning Rate: 0.1
Validation Loss: 0.4826

Architecture: [128, 1], Activation Function: relu, Learning Rate: 0.001
Validation Loss: 3.7046

Architecture: [128, 1], Activation Func