# PyTorch Network Training

In [1]:
import numpy as np
import torch

from torch import nn
from torch.optim import SGD
from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision.transforms import ToTensor

if torch.cuda.is_available(): device = 'cuda'
else: device = 'cpu'

print('Using', device, 'computing')

Using cpu computing


Let's construct the net to classify MNIST (our beloved benchmarking dataset) on a vanilla neural network (SGD, sigmoid, SSE loss) with two hidden layers

In [8]:
model = nn.Sequential()

model.append(nn.Flatten())
model.append(nn.Linear(28*28, 256))
model.append(nn.Sigmoid())
model.append(nn.Linear(256, 128))
model.append(nn.Sigmoid())
model.append(nn.Linear(128, 10))
model.append(nn.Sigmoid())

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): Sigmoid()
  (3): Linear(in_features=256, out_features=128, bias=True)
  (4): Sigmoid()
  (5): Linear(in_features=128, out_features=10, bias=True)
  (6): Sigmoid()
)

Let's create a model from the class, load it onto the computing device, and print the structure of the network.

In [9]:
model.to(device)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): Sigmoid()
  (3): Linear(in_features=256, out_features=128, bias=True)
  (4): Sigmoid()
  (5): Linear(in_features=128, out_features=10, bias=True)
  (6): Sigmoid()
)

Now, let's read in MNIST

In [10]:
# read in MNIST training data
training_data = datasets.MNIST(
    root = 'data',
    train = True,
    download = True,
    transform = ToTensor()
)

# read in MNIST test data
test_data = datasets.MNIST(
    root = 'data',
    train = False,
    download = True,
    transform = ToTensor()
)

# create a data loader
train_dataloader = DataLoader(training_data, batch_size = 64, shuffle = True)
test_dataloader = DataLoader(test_data, batch_size = 64, shuffle = True)

Let's create training and testing loops.

In [None]:
def train_loop(dataloader, model, loss_function, optimizer):
    
    size = len(dataloader.dataset)
    
    for batch, (X, y) in enumerate(dataloader):
        # compute prediction and loss
        pred = model(X)
        loss = loss_function(pred, y)

        # backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss = loss.item()
            current = batch * len(X)
            print('loss:', np.round(loss, 4), '\t[', current, '/', size, ']')

def test_loop(dataloader, model, loss_fn):
    
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_function(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    
    print('Test error: \n Accuracy:', np.round(100*correct, 2), '%, Avg loss:', test_loss, '\n')

Next, let's classify MNIST with our vanilla net.

In [26]:
learning_rate = 0.1
batch_size = 64
epochs = 10

# initialize the loss function
loss_function = nn.CrossEntropyLoss()

# initialize the optimizer
optimizer = SGD(model.parameters(), lr = learning_rate)

size = len(train_dataloader.dataset)

for epoch in range(epochs):
    print('Epoch', epoch + 1, '\n------------------------------------------------------')
    
    # train loop
    train_loop(train_dataloader, model, loss_function, optimizer)
    
    # test loop
    test_loop(test_dataloader, model, loss_function)

Epoch 1 
------------------------------------------------------
loss: 1.714845895767212 	[ 0 / 60000 ]
loss: 1.6598721742630005 	[ 6400 / 60000 ]
loss: 1.624276041984558 	[ 12800 / 60000 ]
loss: 1.6571447849273682 	[ 19200 / 60000 ]
loss: 1.6610627174377441 	[ 25600 / 60000 ]
loss: 1.6523206233978271 	[ 32000 / 60000 ]
loss: 1.6325024366378784 	[ 38400 / 60000 ]
loss: 1.59742271900177 	[ 44800 / 60000 ]
loss: 1.6581920385360718 	[ 51200 / 60000 ]
loss: 1.6713268756866455 	[ 57600 / 60000 ]
Test error: 
 Accuracy: 70.86 %, Avg loss: 1.6330968133962838 

Epoch 2 
------------------------------------------------------
loss: 1.632216215133667 	[ 0 / 60000 ]
loss: 1.6141785383224487 	[ 6400 / 60000 ]
loss: 1.5921128988265991 	[ 12800 / 60000 ]
loss: 1.6429325342178345 	[ 19200 / 60000 ]
loss: 1.6612650156021118 	[ 25600 / 60000 ]
loss: 1.6100870370864868 	[ 32000 / 60000 ]
loss: 1.6392097473144531 	[ 38400 / 60000 ]
loss: 1.665850281715393 	[ 44800 / 60000 ]
loss: 1.6741056442260742 	[ 5120