# Feedforward Neural Network

## Load Dependent Packages

In [1]:
import torch
import torchvision

## Device Configuration: CPU or GPU

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Select Hyper Parameters

In [3]:
input_size = 28 * 28
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.01

## Define Dataset Class

In [4]:
import numpy as np
import gzip
import os
class MNISTDataset(torch.utils.data.Dataset):
    def __init__(self, root, train=True, transform=None):
        # The file name prefix is obtained according to whether it is a training set or not.
        self.file_pre = 'train' if train == True else 't10k'
        self.transform = transform

        # Generate the image and label file path of the corresponding dataset.
        self.label_path = os.path.join(root, '%s-labels-idx1-ubyte.gz' % self.file_pre)
        self.image_path = os.path.join(root, '%s-images-idx3-ubyte.gz' % self.file_pre)

        # Read file data and return pictures and labels.
        self.images, self.labels = self.__read_data__(self.image_path, self.label_path)

    def __read_data__(self, image_path, label_path):
        # Data set reading.
        with gzip.open(label_path, 'rb') as lbpath:
            labels = np.frombuffer(lbpath.read(), np.uint8, offset=8)
        with gzip.open(image_path, 'rb') as imgpath:
            images = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(labels), 28, 28)
        return images, labels

    def __getitem__(self, index):
        image, label = self.images[index], int(self.labels[index])
        
        # If you need to convert to tensor, use tansform.
        if self.transform is not None:
            image = self.transform(np.array(image))  # Avoid bug: use np.array
        return image, label

    def __len__(self):
        return len(self.labels)

## Load through Local Dataset

In [5]:
# MNIST dataset (images and labels)
train_dataset = MNISTDataset('../data/MNIST/', transform=torchvision.transforms.ToTensor())
test_dataset = MNISTDataset('../data/MNIST/', train=False, transform=torchvision.transforms.ToTensor())

## Set Data loader (Input Pipeline)

In [6]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

## Fully Connected Neural Network with One Hidden Layer

In [7]:
class NeuralNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size)
        self.relu = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        output = self.fc1(x)
        output = self.relu(output)
        output = self.fc2(output)
        return output

## Make Model with Hyper-params

In [8]:
model = NeuralNet(input_size, hidden_size, num_classes).to(device)

## Loss and optimizer

In [9]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

## Train Model with GPU

In [10]:
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to configured device
        # images.size = (batchsize, channels, x, y)
        # images = images.reshape(images.size(0), -1).to(device)
        images = images.reshape(-1, input_size).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/5], Step [100/600], Loss: 0.3548
Epoch [1/5], Step [200/600], Loss: 0.3716
Epoch [1/5], Step [300/600], Loss: 0.1688
Epoch [1/5], Step [400/600], Loss: 0.3031
Epoch [1/5], Step [500/600], Loss: 0.1814
Epoch [1/5], Step [600/600], Loss: 0.0972
Epoch [2/5], Step [100/600], Loss: 0.2293
Epoch [2/5], Step [200/600], Loss: 0.0615
Epoch [2/5], Step [300/600], Loss: 0.0470
Epoch [2/5], Step [400/600], Loss: 0.1991
Epoch [2/5], Step [500/600], Loss: 0.1298
Epoch [2/5], Step [600/600], Loss: 0.1901
Epoch [3/5], Step [100/600], Loss: 0.1107
Epoch [3/5], Step [200/600], Loss: 0.0555
Epoch [3/5], Step [300/600], Loss: 0.0414
Epoch [3/5], Step [400/600], Loss: 0.0392
Epoch [3/5], Step [500/600], Loss: 0.0987
Epoch [3/5], Step [600/600], Loss: 0.0704
Epoch [4/5], Step [100/600], Loss: 0.0808
Epoch [4/5], Step [200/600], Loss: 0.0894
Epoch [4/5], Step [300/600], Loss: 0.2186
Epoch [4/5], Step [400/600], Loss: 0.0385
Epoch [4/5], Step [500/600], Loss: 0.1194
Epoch [4/5], Step [600/600], Loss:

## Test Model with no_grad

In [11]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 96.89 %


## Save the Model Checkpoint

In [12]:
torch.save(model.state_dict(), 'feedforward_neural_network.ckpt')