# Implementing a first neural network using PyTorch 

In this exercise, we will learn how to implement a neural network using PyTorch. 

## Single Layer Neural Network 

Let's implmement a single layer neural network, namely 

$$
\begin{align}
x &= [x_1, \ldots, x_m] \\
g(x) &= \sum_{j=1}^n w_j x_{ij} + b
\end{align}
$$

### Preprocessing

We will use the King County housing dataset [link](https://www.kaggle.com/datasets/harlfoxem/housesalesprediction).


In [1]:
# from sklearn.datasets import fetch_california_housing
import seaborn as sns
import numpy as np
import pandas as pd

data_table = pd.read_csv("../../data/kc_house_data.csv")
data_table["sale_yr"] = pd.to_numeric(data_table.date.str.slice(0, 4))
data_table["sale_month"] = pd.to_numeric(data_table.date.str.slice(4, 6))
data_table["sale_day"] = pd.to_numeric(data_table.date.str.slice(6, 8))
data_table = pd.DataFrame(
    data_table,
    columns=[
        "sale_yr",
        "sale_month",
        "sale_day",
        "view",
        "waterfront",
        "lat",
        "long",
        "bedrooms",
        "bathrooms",
        "sqft_living",
        "sqft_lot",
        "floors",
        "condition",
        "grade",
        "sqft_above",
        "sqft_basement",
        "yr_built",
        "yr_renovated",
        "zipcode",
        "sqft_living15",
        "sqft_lot15",
        "price",
    ],
)
data_table

Unnamed: 0,sale_yr,sale_month,sale_day,view,waterfront,lat,long,bedrooms,bathrooms,sqft_living,...,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,sqft_living15,sqft_lot15,price
0,2014,10,13,0,0,47.5112,-122.257,3,1.00,1180,...,3,7,1180,0,1955,0,98178,1340,5650,221900.0
1,2014,12,9,0,0,47.7210,-122.319,3,2.25,2570,...,3,7,2170,400,1951,1991,98125,1690,7639,538000.0
2,2015,2,25,0,0,47.7379,-122.233,2,1.00,770,...,3,6,770,0,1933,0,98028,2720,8062,180000.0
3,2014,12,9,0,0,47.5208,-122.393,4,3.00,1960,...,5,7,1050,910,1965,0,98136,1360,5000,604000.0
4,2015,2,18,0,0,47.6168,-122.045,3,2.00,1680,...,3,8,1680,0,1987,0,98074,1800,7503,510000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21608,2014,5,21,0,0,47.6993,-122.346,3,2.50,1530,...,3,8,1530,0,2009,0,98103,1530,1509,360000.0
21609,2015,2,23,0,0,47.5107,-122.362,4,2.50,2310,...,3,8,2310,0,2014,0,98146,1830,7200,400000.0
21610,2014,6,23,0,0,47.5944,-122.299,2,0.75,1020,...,3,7,1020,0,2009,0,98144,1020,2007,402101.0
21611,2015,1,16,0,0,47.5345,-122.069,3,2.50,1600,...,3,8,1600,0,2004,0,98027,1410,1287,400000.0


In [2]:
y = data_table["price"].values / 1000
X = data_table.drop("price", axis=1).values
# housing = fetch_california_housing()
# X, y = housing.data, housing.target

Split the data into training and test set. Use 80% of the data for training and 20% for testing.


In [3]:
# TODO: split the data into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

Standardize the features using its mean and standard deviation.


In [4]:
# TODO: Standardize the data
from sklearn.preprocessing import StandardScaler

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Convert to the data to PyTorch Tensors


In [5]:
# TODO: Convert to PyTorch tensors
import torch

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [6]:
# TODO: Prepare the dataset for batching
from torch.utils.data import DataLoader, TensorDataset

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

## Model & Training 


Define the model:


In [7]:
# TODO: Define the model


class SingleLayerNN(torch.nn.Module):
    def __init__(self, input_size):
        super(SingleLayerNN, self).__init__()
        self.linear = torch.nn.Linear(input_size, 1)

    def forward(self, x):
        return self.linear(x)


# Instantiate the model
model = SingleLayerNN(X_train.shape[1])

Define the loss:


In [8]:
# TODO: Define the loss function and optimizer

criterion = torch.nn.MSELoss()

Define the optimizer


In [9]:
# TODO: Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [10]:
# Training loop with DataLoader
n_epochs = 100
model.train()
for epoch in range(n_epochs):
    epoch_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    epoch_loss /= len(train_loader)
    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1}, Average Loss: {epoch_loss}")

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 5, Average Loss: 431348.085101476
Epoch 10, Average Loss: 430715.12984317343
Epoch 15, Average Loss: 428908.4181849631
Epoch 20, Average Loss: 427102.56636300735
Epoch 25, Average Loss: 426367.06025138375
Epoch 30, Average Loss: 425470.2105627306
Epoch 35, Average Loss: 423271.5787592251
Epoch 40, Average Loss: 421271.07645295205
Epoch 45, Average Loss: 420328.06036669743
Epoch 50, Average Loss: 418667.73166512913
Epoch 55, Average Loss: 417383.1949377306
Epoch 60, Average Loss: 415629.65457795205
Epoch 65, Average Loss: 414352.7849400369
Epoch 70, Average Loss: 412573.8754035978
Epoch 75, Average Loss: 411592.56757380074
Epoch 80, Average Loss: 410019.2931273063
Epoch 85, Average Loss: 408560.7741005535
Epoch 90, Average Loss: 407463.5655558118
Epoch 95, Average Loss: 406001.59968865314
Epoch 100, Average Loss: 404784.6054543358


Evaluate the model


In [11]:
# Evaluate the model
model.eval()
total_loss = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        predictions = model(inputs)
        test_loss = criterion(predictions, labels)
        total_loss += test_loss.item()
average_loss = total_loss / len(test_loader)
print(f"Test Loss: {average_loss}")

Test Loss: 370709.18290441175


  return F.mse_loss(input, target, reduction=self.reduction)


# Deeper neural networks

Define a deeper model w/ non-linear activation function.


In [12]:
# TODO: Define a deeper model w/ non-linear activation function and add dropout to prevent overfitting


class MultiLayerNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size=64, dropout_rate=0.5):
        super(MultiLayerNN, self).__init__()
        self.linear1 = torch.nn.Linear(input_size, hidden_size)
        self.linear2 = torch.nn.Linear(hidden_size, hidden_size)
        self.linear3 = torch.nn.Linear(hidden_size, 1)
        self.relu = torch.nn.LeakyReLU()

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        x = self.relu(x)
        x = self.linear3(x)
        return x


# Instantiate the model
model = MultiLayerNN(X_train.shape[1])
X_train.shape, y_train.shape

((17290, 21), (17290,))

Train & Evaluate the model


In [13]:
# TODO: Run the training loop and evaluate the model

criterion = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

n_epochs = 100
model.train()
for epoch in range(n_epochs):
    epoch_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    epoch_loss /= len(train_loader)
    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1}, Average Loss: {epoch_loss}")

# Evaluate the model
model.eval()
total_loss = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        predictions = model(inputs)
        test_loss = criterion(predictions, labels)
        total_loss += test_loss.item()
average_loss = total_loss / len(test_loader)
print(f"Test Loss: {average_loss}")

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 5, Average Loss: 141209.373255881
Epoch 10, Average Loss: 139742.59127075647
Epoch 15, Average Loss: 139012.73214079798
Epoch 20, Average Loss: 138837.38726648985
Epoch 25, Average Loss: 138670.0625144142
Epoch 30, Average Loss: 138623.6017642989
Epoch 35, Average Loss: 138595.52937615314
Epoch 40, Average Loss: 138593.56381169282
Epoch 45, Average Loss: 138523.37257841328
Epoch 50, Average Loss: 139482.01759974632
Epoch 55, Average Loss: 138987.8526435655
Epoch 60, Average Loss: 138489.50442516143
Epoch 65, Average Loss: 138467.61645237546
Epoch 70, Average Loss: 138468.69299901984
Epoch 75, Average Loss: 138424.08750864852
Epoch 80, Average Loss: 138461.15707881688
Epoch 85, Average Loss: 138855.76011877306
Epoch 90, Average Loss: 138645.0666368773
Epoch 95, Average Loss: 139324.06627652215
Epoch 100, Average Loss: 138448.26095479704
Test Loss: 118547.0838694853


  return F.mse_loss(input, target, reduction=self.reduction)


# Drop out


In [14]:
# TODO: Define a deeper model w/ non-linear activation function


class MultiLayerNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size=32):
        super(MultiLayerNN, self).__init__()
        self.linear1 = torch.nn.Linear(input_size, hidden_size)
        self.linear2 = torch.nn.Linear(hidden_size, hidden_size)
        self.linear3 = torch.nn.Linear(hidden_size, 1)
        self.dropout = torch.nn.Dropout(p=0.1)
        self.relu = torch.nn.LeakyReLU()

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear3(x)
        return x


# Instantiate the model
model = MultiLayerNN(X_train.shape[1])

In [15]:
# TODO: Run the training loop and evaluate the model

criterion = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

n_epochs = 50
model.train()
for epoch in range(n_epochs):
    epoch_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    epoch_loss /= len(train_loader)
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Average Loss: {epoch_loss}")

# Evaluate the model
model.eval()
total_loss = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        predictions = model(inputs)
        test_loss = criterion(predictions, labels)
        total_loss += test_loss.item()
average_loss = total_loss / len(test_loader)
print(f"Test Loss: {average_loss}")

Epoch 10, Average Loss: 143405.00063422509
Epoch 20, Average Loss: 142172.27642124076
Epoch 30, Average Loss: 142194.90584640222
Epoch 40, Average Loss: 141833.34426891143
Epoch 50, Average Loss: 141228.6308521679
Test Loss: 118687.990234375
