In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from utils import mnist

In [4]:
path = './data/MNIST_data'

In [5]:
train_loader, test_loader = mnist()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [6]:
class Net(nn.Module):
    def __init__(self, log_softmax=False):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 10)
        self.log_softmax = log_softmax
        self.optim = optim.Adam(self.parameters(), lr=0.01)
        
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.sigmoid(self.fc1(x))
        x = F.sigmoid(self.fc2(x))
        x = self.fc3(x)
        if self.log_softmax:
            x = F.log_softmax(x, dim=1)
        else:
            x = torch.log(F.softmax(x, dim=1))
        return x
    
    def loss(self, output, target, **kwargs):
        self._loss = F.nll_loss(output, target, **kwargs)
        return self._loss

In [7]:
def train(epoch, models):
    for batch_idx, (data, target) in enumerate(train_loader):
        for model in models:
            model.optim.zero_grad()
            output = model(data)
            loss = model.loss(output, target)
            loss.backward()
            model.optim.step()
            
        if batch_idx % 200 == 0:
            line = 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLosses '.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader))
            losses = ' '.join(['{}: {:.6f}'.format(i, m._loss.item()) for i, m in enumerate(models)])
            print(line + losses)
            
    else:
        batch_idx += 1
        line = 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLosses '.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader))
        losses = ' '.join(['{}: {:.6f}'.format(i, m._loss.item()) for i, m in enumerate(models)])
        print(line + losses)

In [None]:
models = [Net(), Net(True)]

In [None]:
avg_lambda = lambda l: 'Loss: {:.4f}'.format(l)
acc_lambda = lambda c, p: 'Accuracy: {}/{} ({:.0f}%)'.format(c, len(test_loader.dataset), p)
line = lambda i, l, c, p: '{}: '.format(i) + avg_lambda(l) + '\t' + acc_lambda(c, p)

def test(models):
    test_loss = [0]*len(models)
    correct = [0]*len(models)
    with torch.no_grad():
        for data, target in test_loader:
            output = [m(data) for m in models]
            for i, m in enumerate(models):
                test_loss[i] += m.loss(output[i], target, size_average=False).item() # sum up batch loss
                pred = output[i].data.max(1, keepdim=True)[1] # get the index of the max log-probability
                correct[i] += pred.eq(target.data.view_as(pred)).cpu().sum()
    
    for i in range(len(models)):
        test_loss[i] /= len(test_loader.dataset)
    correct_pct = [100. * c / len(test_loader.dataset) for c in correct]
    lines = '\n'.join([line(i, test_loss[i], correct[i], correct_pct[i]) for i in range(len(models))]) + '\n'
    report = 'Test set:\n' + lines
    
    print(report)

In [None]:
for epoch in range(1, 21):
    train(epoch, models)
    test(models)

In [9]:
x, y = next(iter(train_loader))
x.shape, y.shape

(torch.Size([50, 1, 28, 28]), torch.Size([50]))

In [29]:
net = nn.Sequential(nn.Linear(28*28, 128), nn.Linear(128, 128), nn.Linear(128, 10))
net_opt = optim.Adam(net.parameters(), lr=0.01)
net_opt.zero_grad()
y_pred = net(x.view(-1, 28*28))
y_pred[:2]

tensor([[-0.1068,  0.1199, -0.0403,  0.1336, -0.0533, -0.2989,  0.0741, -0.0117,
          0.0094, -0.2413],
        [ 0.0095,  0.1962, -0.0593,  0.0255, -0.1660, -0.0856,  0.1280, -0.1278,
         -0.0295, -0.0410]], grad_fn=<SliceBackward>)

In [38]:
F.nll_loss(F.log_softmax(y_pred), y)

  """Entry point for launching an IPython kernel.


tensor(2.3394, grad_fn=<NllLossBackward>)

In [39]:
F.nll_loss(torch.log(F.softmax(y_pred)), y)

  """Entry point for launching an IPython kernel.


tensor(2.3394, grad_fn=<NllLossBackward>)

In [40]:
F.cross_entropy(y_pred, y)

tensor(2.3394, grad_fn=<NllLossBackward>)