In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
# Load the dataset
df = pd.read_csv('kaggle/california_housing_prices.csv')
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [3]:
# Preprocess the dataset
# Drop rows with missing values
df = df.dropna()

# Use one-hot encoding for categorical variables
df = pd.get_dummies(df, drop_first=True)

# Separate features and target
X = df.drop('median_house_value', axis=1).values
y = df['median_house_value'].values

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
class RegressionNN(nn.Module):
    def __init__(self, input_size):
        super(RegressionNN, self).__init__()
        # create a neural network with 2 hidden layers
        # the first hidden layer has 128 neurons
        # the second hidden layer has 64 neurons
        # the output layer has 1 neuron
        # use ReLU activation function for hidden layers
        self.fc1 = nn.Linear(input_size, 128) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        # apply ReLU activation function to the first hidden layer
        x = self.relu(self.fc1(x)) 
        # apply ReLU activation function to the second hidden layer
        x = self.relu(self.fc2(x)) 
        # output layer
        x = self.fc3(x) 
        return x

In [5]:
# Convert arrays to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).view(-1, 1)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test).view(-1, 1)

# Initialize the model, loss function, and optimizer
model = RegressionNN(X_train.shape[1]) # input_size is the number of features
criterion = nn.MSELoss() # Mean Squared Error
optimizer = optim.Adam(model.parameters(), lr=0.01) # Adam is a variant of gradient descent, lr is the learning rate

# Training loop
epochs = 1_000
for epoch in range(epochs):
    model.train() # Set the model to training mode
    optimizer.zero_grad() # Reset gradients
    outputs = model(X_train_tensor) # Forward pass
    loss = criterion(outputs, y_train_tensor) # Compute loss
    loss.backward() # Backward pass
    optimizer.step() # Update weights
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')


Epoch 1/1000, Loss: 55938727936.0
Epoch 11/1000, Loss: 55927013376.0
Epoch 21/1000, Loss: 55869984768.0
Epoch 31/1000, Loss: 55700086784.0
Epoch 41/1000, Loss: 55307309056.0
Epoch 51/1000, Loss: 54540169216.0
Epoch 61/1000, Loss: 53219205120.0
Epoch 71/1000, Loss: 51162669056.0
Epoch 81/1000, Loss: 48223932416.0
Epoch 91/1000, Loss: 44338892800.0
Epoch 101/1000, Loss: 39574777856.0
Epoch 111/1000, Loss: 34170204160.0
Epoch 121/1000, Loss: 28527054848.0
Epoch 131/1000, Loss: 23093557248.0
Epoch 141/1000, Loss: 18263402496.0
Epoch 151/1000, Loss: 14409444352.0
Epoch 161/1000, Loss: 11783067648.0
Epoch 171/1000, Loss: 10299244544.0
Epoch 181/1000, Loss: 9545753600.0
Epoch 191/1000, Loss: 9090859008.0
Epoch 201/1000, Loss: 8722791424.0
Epoch 211/1000, Loss: 8399208960.0
Epoch 221/1000, Loss: 8118205952.0
Epoch 231/1000, Loss: 7872098304.0
Epoch 241/1000, Loss: 7651426816.0
Epoch 251/1000, Loss: 7449597952.0
Epoch 261/1000, Loss: 7262687744.0
Epoch 271/1000, Loss: 7088117248.0
Epoch 281/100

In [6]:
model.eval() # Set the model to evaluation mode
with torch.no_grad(): # Turn off gradient computation for validation to save time
    predictions = model(X_test_tensor) # Make predictions
    mse = criterion(predictions, y_test_tensor) # Calculate the loss

print(f'Test MSE: {mse.item()}')

Test MSE: 4520810496.0
