# Implementing a first neural network using PyTorch 

In this exercise, we will learn how to implement a neural network using PyTorch. 

## Single Layer Neural Network 

Let's implmement a single layer neural network, namely 

$$
\begin{align}
x &= [x_1, \ldots, x_m] \\
g(x) &= \sum_{j=1}^n w_j x_{ij} + b
\end{align}
$$

### Preprocessing

We will use the California housing dataset [link](https://www.kaggle.com/datasets/camnugent/california-housing-prices).


In [8]:
from sklearn.datasets import fetch_california_housing
import seaborn as sns
import numpy as np

housing = fetch_california_housing()

X, y = housing.data, housing.target

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

Split the data into training and test set. Use 80% of the data for training and 20% for testing.


In [None]:
# TODO: split the data into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

Standardize the features using its mean and standard deviation.


In [None]:
# TODO: Standardize the data
from sklearn.preprocessing import StandardScaler

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Convert to the data to PyTorch Tensors


In [None]:
# TODO: Convert to PyTorch tensors
import torch

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [9]:
# TODO: Prepare the dataset for batching
from torch.utils.data import DataLoader, TensorDataset

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

## Model & Training 


Define the model:


In [19]:
# TODO: Define the model


class SingleLayerNN(torch.nn.Module):
    def __init__(self, input_size):
        super(SingleLayerNN, self).__init__()
        self.linear = torch.nn.Linear(input_size, 1)

    def forward(self, x):
        return self.linear(x)


# Instantiate the model
model = SingleLayerNN(X_train.shape[1])

Define the loss:


In [20]:
# TODO: Define the loss function and optimizer

criterion = torch.nn.MSELoss()

Define the optimizer


In [21]:
# TODO: Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [22]:
# Training loop with DataLoader
n_epochs = 50
model.train()
for epoch in range(n_epochs):
    epoch_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    epoch_loss /= len(train_loader)
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Average Loss: {epoch_loss}")

Epoch 10, Average Loss: 0.5291050224918727
Epoch 20, Average Loss: 0.519401736210945
Epoch 30, Average Loss: 0.5190887899650621
Epoch 40, Average Loss: 0.5194684881572575
Epoch 50, Average Loss: 0.518878079748662
Epoch 60, Average Loss: 0.5188587546752866
Epoch 70, Average Loss: 0.5191625304115836
Epoch 80, Average Loss: 0.5189308607878611
Epoch 90, Average Loss: 0.5190540632709514
Epoch 100, Average Loss: 0.5192787988876649


Evaluate the model


In [23]:
# Evaluate the model
model.eval()
total_loss = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        predictions = model(inputs)
        test_loss = criterion(predictions, labels)
        total_loss += test_loss.item()
average_loss = total_loss / len(test_loader)
print(f"Test Loss: {average_loss}")

Test Loss: 0.5562466165816137


# Deeper neural networks

Define a deeper model w/ non-linear activation function.


In [32]:
# TODO: Define a deeper model w/ non-linear activation function


class MultiLayerNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size=32):
        super(MultiLayerNN, self).__init__()
        self.linear1 = torch.nn.Linear(input_size, hidden_size)
        self.linear2 = torch.nn.Linear(hidden_size, hidden_size)
        self.linear3 = torch.nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.linear3(x)
        return x


# Instantiate the model
model = MultiLayerNN(X_train.shape[1])

Train & Evaluate the model


In [33]:
# TODO: Run the training loop and evaluate the model

criterion = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

n_epochs = 50
model.train()
for epoch in range(n_epochs):
    epoch_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    epoch_loss /= len(train_loader)
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Average Loss: {epoch_loss}")

# Evaluate the model
model.eval()
total_loss = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        predictions = model(inputs)
        test_loss = criterion(predictions, labels)
        total_loss += test_loss.item()
average_loss = total_loss / len(test_loader)
print(f"Test Loss: {average_loss}")

Epoch 10, Average Loss: 0.5316799880518007
Epoch 20, Average Loss: 0.5361191750641248
Epoch 30, Average Loss: 0.5245967780550321
Epoch 40, Average Loss: 0.5532454193620256
Epoch 50, Average Loss: 0.5237977422427299
Test Loss: 0.5509383784708126


### Did the performance improved? If not, try the followings 

1. Sandwitching non-linear activation with linear layers such as torch.nn.LeakyReLu. And rerun the training.

2. Insert a drop out to prevent overfitting.
