Idea:https://www.kaggle.com/code/stefancomanita/regression-with-neural-networks-using-pytorc

# California Housing

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Check GPU support

In [2]:
if torch.cuda.is_available():
    print("CUDA (GPU support) is available in PyTorch!")
    device = torch.device("cuda")
else:
    print("CUDA (GPU support) is not available. Using CPU.")
    device = torch.device("cpu")

CUDA (GPU support) is available in PyTorch!


### Load dataset

In [3]:
from sklearn.datasets import fetch_california_housing

# Load the dataset
california_housing = fetch_california_housing(as_frame=True)

# Get the features and target variable
X = california_housing.data
y = california_housing.target
print(X.head())
print(y.head())

   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  
0    -122.23  
1    -122.22  
2    -122.24  
3    -122.25  
4    -122.25  
0    4.526
1    3.585
2    3.521
3    3.413
4    3.422
Name: MedHouseVal, dtype: float64


### Check correlation matrix

In [4]:
import pandas as pd
california_housing_df = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
california_housing_df['MedHouseVal'] = california_housing.target
corr_matrix = california_housing_df.corr()
corr_matrix['MedHouseVal'].sort_values(ascending=False)

MedHouseVal    1.000000
MedInc         0.688075
AveRooms       0.151948
HouseAge       0.105623
AveOccup      -0.023737
Population    -0.024650
Longitude     -0.045967
AveBedrms     -0.046701
Latitude      -0.144160
Name: MedHouseVal, dtype: float64

### Drop negative correlation column

In [5]:
#X = X.drop(columns=['AveOccup', 'Population', 'Longitude', 'AveBedrms', 'Latitude'], axis=1)
#X.dropna()
#X.shape

### Creating training & testing sets

In [6]:
# Scale the features
X = StandardScaler().fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y.values, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train, X_test, y_train, y_test = map(
    torch.tensor, (X_train, X_test, y_train, y_test)
)

# Create Tensor datasets
train_ds = TensorDataset(X_train.float(), y_train.float())
test_ds = TensorDataset(X_test.float(), y_test.float())

# Data loaders
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32)

### Define model

In [None]:
'''
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(X.shape[1], 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = MLP().to(device)
'''

In [7]:
# Define the MLP model
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(X.shape[1], 24)
        self.fc2 = nn.Linear(24, 12)
        self.fc3 = nn.Linear(12, 6)
        self.fc4 = nn.Linear(6, 1)
        self.relu = nn.ReLU()

    #this is mandatory
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

model = MLP().to(device)

### Define loss function and optimizer

In [8]:
# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Training & Evalulation

In [9]:
# Training loop
num_epochs = 400
for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
      inputs, targets = inputs.to(device), targets.to(device)
      optimizer.zero_grad()
      outputs = model(inputs).squeeze()
      loss = criterion(outputs, targets)
      loss.backward()
      optimizer.step()

    # Evaluation with mean squared error
    model.eval()
    with torch.no_grad():
        mse = 0
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs).squeeze()
            mse += criterion(outputs, targets).item()

    mse /= len(test_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, MSE: {mse:.4f}')

print("Training complete")

Epoch 1/400, MSE: 0.5402
Epoch 2/400, MSE: 0.4182
Epoch 3/400, MSE: 0.3954
Epoch 4/400, MSE: 0.3803
Epoch 5/400, MSE: 0.3642
Epoch 6/400, MSE: 0.3557
Epoch 7/400, MSE: 0.3488
Epoch 8/400, MSE: 0.3387
Epoch 9/400, MSE: 0.3381
Epoch 10/400, MSE: 0.3307
Epoch 11/400, MSE: 0.3282
Epoch 12/400, MSE: 0.3350
Epoch 13/400, MSE: 0.3372
Epoch 14/400, MSE: 0.3281
Epoch 15/400, MSE: 0.3172
Epoch 16/400, MSE: 0.3196
Epoch 17/400, MSE: 0.3157
Epoch 18/400, MSE: 0.3099
Epoch 19/400, MSE: 0.3163
Epoch 20/400, MSE: 0.3174
Epoch 21/400, MSE: 0.3098
Epoch 22/400, MSE: 0.3119
Epoch 23/400, MSE: 0.3151
Epoch 24/400, MSE: 0.3186
Epoch 25/400, MSE: 0.3070
Epoch 26/400, MSE: 0.3110
Epoch 27/400, MSE: 0.3154
Epoch 28/400, MSE: 0.3018
Epoch 29/400, MSE: 0.3059
Epoch 30/400, MSE: 0.3031
Epoch 31/400, MSE: 0.3068
Epoch 32/400, MSE: 0.3049
Epoch 33/400, MSE: 0.3048
Epoch 34/400, MSE: 0.3087
Epoch 35/400, MSE: 0.3001
Epoch 36/400, MSE: 0.3027
Epoch 37/400, MSE: 0.3010
Epoch 38/400, MSE: 0.2976
Epoch 39/400, MSE: 0.