#### Steps

1. Load Dataset
2. Make Dataset Iterable
3. Create Model Class
4. Instantiate Model Class
5. Instantiate Loss Class
6. Instantiate Optimizer Class
7. Train Model

### Step 1: Loading MNIST Train Dataset

In [23]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

In [24]:
train_dataset = dsets.MNIST(root='./data',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)

test_dataset = dsets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

### Step 2: Make Dataset Iterable

In [25]:
batch_size = 100
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

### Step 3: Create Model Class

In [26]:
class FeedforwardNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FeedforwardNN, self).__init__()
        
        # Linear function
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        # Non-linearity
        self.sigmoid = nn.Sigmoid()
        # Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        # Linear function
        out = self.fc1(x)
        # Non-linearity
        out = self.sigmoid(out)
        # Linear function (readout)
        out = self.fc2(out)
        
        return out

### Step 4: Instantiate Model Class

In [27]:
input_dim = 28*28
hidden_dim = 100  # can be any number. Similar term: number of neurons
output_dim = 10

model = FeedforwardNN(input_dim, hidden_dim, output_dim)

### Step 5: Instantiate Loss Class

In [28]:
# https://pytorch.org/docs/stable/nn.html#torch.nn.CrossEntropyLoss
# In PyTorch, nn.CrossEntropyLoss() automatically calculate beforehand,
# so don't need to add Softmax layer
criterion = nn.CrossEntropyLoss()

### Step 6: Instantiate Optimizer Class

In [29]:
lr = 0.1
optim = torch.optim.SGD(model.parameters(), lr=lr)

### Step 7: Train Model

Process:
1. Convert inputs/labels to variables
2. Clear gradient buffers
3. Get output given inputs
4. Get loss
5. Get gradients w.r.t. parameters
6. Update parameters using gradients
    * parameters = parameters - learning_rate * parameters_gradients
7. REPEAT

In [30]:
iter = 0
for epoch in range(num_epochs):
    for i, (image, labels) in enumerate(train_loader):
        images = Variable(image.view(-1, 28*28))
        labels = Variable(labels)
        
        optim.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optim.step()
        iter += 1
        
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, 28*28))
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
                
            accuracy = 100 * correct / total
            
            print("Iteration: {}. Loss: {}. Accuracy: {}".format(iter, loss.data[0], accuracy))



Iteration: 500. Loss: 0.6656153798103333. Accuracy: 86
Iteration: 1000. Loss: 0.36944520473480225. Accuracy: 89
Iteration: 1500. Loss: 0.41461634635925293. Accuracy: 90
Iteration: 2000. Loss: 0.2815324068069458. Accuracy: 91
Iteration: 2500. Loss: 0.24458059668540955. Accuracy: 91
Iteration: 3000. Loss: 0.30974116921424866. Accuracy: 92


In [31]:
print(model)

FeedforwardNN(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (sigmoid): Sigmoid()
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)


#### Print model summary in PyTorch

https://stackoverflow.com/questions/42480111/model-summary-in-pytorch

In [33]:
def torch_summarize(model, show_weights=True, show_parameters=True):
    """Summarizes torch model by showing trainable parameters and weights."""
    tmpstr = model.__class__.__name__ + ' (\n'
    for key, module in model._modules.items():
        # if it contains layers let call it recursively to get params and weights
        if type(module) in [
            torch.nn.modules.container.Container,
            torch.nn.modules.container.Sequential
        ]:
            modstr = torch_summarize(module)
        else:
            modstr = module.__repr__()
        modstr = _addindent(modstr, 2)

        params = sum([np.prod(p.size()) for p in module.parameters()])
        weights = tuple([tuple(p.size()) for p in module.parameters()])

        tmpstr += '  (' + key + '): ' + modstr 
        if show_weights:
            tmpstr += ', weights={}'.format(weights)
        if show_parameters:
            tmpstr +=  ', parameters={}'.format(params)
        tmpstr += '\n'   

    tmpstr = tmpstr + ')'
    return tmpstr

# Test
print(torch_summarize(model))

FeedforwardNN (
  (fc1): Linear(in_features=784, out_features=100, bias=True), weights=((100, 784), (100,)), parameters=78500
  (sigmoid): Sigmoid(), weights=(), parameters=0
  (fc2): Linear(in_features=100, out_features=10, bias=True), weights=((10, 100), (10,)), parameters=1010
)
