In [43]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms

In [44]:
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,)),])

In [45]:
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [46]:
model = nn.Sequential(nn.Linear(784,128), nn.ReLU(), nn.Linear(128,64), nn.ReLU(), nn.Linear(64,10))
criterion = nn.CrossEntropyLoss()
dataiter = iter(trainloader)
images, labels = next(dataiter)
images = images.view(images.shape[0], -1)
logits = model(images)
loss = criterion(logits, labels)

print(loss)

tensor(2.3084, grad_fn=<NllLossBackward>)


In [47]:
# TODO: Build a feed-forward network
model = nn.Sequential(nn.Linear(784,128), nn.ReLU(), nn.Linear(128,64), nn.ReLU(), nn.Linear(64,10), nn.LogSoftmax(dim = 1))

# TODO: Define the loss
criterion = nn.NLLLoss()

dataiter = iter(trainloader)
images, labels = next(dataiter)
images = images.view(images.shape[0], -1)
logits = model(images)
loss = criterion(logits, labels)

print(loss)

tensor(2.2975, grad_fn=<NllLossBackward>)


### Autograd

In [24]:
x = torch.randn(2,2, requires_grad=True)
print(x)

tensor([[ 0.0036, -0.5645],
        [ 0.5003, -0.8814]], requires_grad=True)


In [25]:
y = x**2
print(y.grad_fn)

<PowBackward0 object at 0x000001DA95610460>


In [26]:
z = y.mean()
print(x.grad)
z.backward()
print(x.grad)
print(x)

None
tensor([[ 0.0018, -0.2822],
        [ 0.2501, -0.4407]])
tensor([[ 0.0036, -0.5645],
        [ 0.5003, -0.8814]], requires_grad=True)


In [31]:
print('Before backward pass: \n', model[0].weight.grad)

loss.backward()

print('After backward pass: \n', model[0].weight.grad)

Before backward pass: 
 None
After backward pass: 
 tensor([[ 0.0045,  0.0045,  0.0045,  ...,  0.0045,  0.0045,  0.0045],
        [ 0.0011,  0.0011,  0.0011,  ...,  0.0011,  0.0011,  0.0011],
        [-0.0014, -0.0014, -0.0014,  ..., -0.0014, -0.0014, -0.0014],
        ...,
        [-0.0003, -0.0003, -0.0003,  ..., -0.0003, -0.0003, -0.0003],
        [ 0.0068,  0.0068,  0.0068,  ...,  0.0068,  0.0068,  0.0068],
        [ 0.0019,  0.0019,  0.0019,  ...,  0.0019,  0.0019,  0.0019]])


In [35]:
from torch import optim

# Optimizers require the parameters to optimize and a learning rate
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [39]:
print("Intial weights -",model[0].weight)
dataiter = iter(trainloader)
images, labels = next(dataiter)
images.resize_(64, 784)

optimizer.zero_grad()

output = model(images)
loss = criterion(output,labels)
loss.backward()
print("gradient-", model[0].weight.grad)
optimizer.step()
print("optimised weights", model[0].weight)

Intial weights - Parameter containing:
tensor([[-0.0148,  0.0179, -0.0317,  ..., -0.0160,  0.0206,  0.0102],
        [ 0.0124, -0.0350, -0.0317,  ..., -0.0116, -0.0186, -0.0029],
        [ 0.0162, -0.0270,  0.0120,  ..., -0.0052, -0.0203, -0.0061],
        ...,
        [-0.0083,  0.0187,  0.0257,  ...,  0.0018,  0.0022, -0.0254],
        [-0.0111,  0.0099, -0.0187,  ...,  0.0108,  0.0114, -0.0154],
        [ 0.0258,  0.0233, -0.0156,  ...,  0.0123, -0.0321,  0.0229]],
       requires_grad=True)
gradient- tensor([[-0.0005, -0.0005, -0.0005,  ..., -0.0005, -0.0005, -0.0005],
        [-0.0004, -0.0004, -0.0004,  ..., -0.0004, -0.0004, -0.0004],
        [-0.0009, -0.0009, -0.0009,  ..., -0.0009, -0.0009, -0.0009],
        ...,
        [-0.0008, -0.0008, -0.0008,  ..., -0.0008, -0.0008, -0.0008],
        [ 0.0005,  0.0005,  0.0005,  ...,  0.0005,  0.0005,  0.0005],
        [ 0.0001,  0.0001,  0.0001,  ...,  0.0001,  0.0001,  0.0001]])
optimised weights Parameter containing:
tensor([[-0.0148

In [38]:
optimizer.step()
print("optimised weights", model[0].weight)

optimised weights Parameter containing:
tensor([[-0.0148,  0.0179, -0.0317,  ..., -0.0160,  0.0206,  0.0102],
        [ 0.0124, -0.0350, -0.0317,  ..., -0.0116, -0.0186, -0.0029],
        [ 0.0162, -0.0270,  0.0120,  ..., -0.0052, -0.0203, -0.0061],
        ...,
        [-0.0083,  0.0187,  0.0257,  ...,  0.0018,  0.0022, -0.0254],
        [-0.0111,  0.0099, -0.0187,  ...,  0.0108,  0.0114, -0.0154],
        [ 0.0258,  0.0233, -0.0156,  ...,  0.0123, -0.0321,  0.0229]],
       requires_grad=True)


In [49]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.003)

epochs = 5
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
    
        # TODO: Training pass
        optimizer.zero_grad()

        output = model(images)
        loss = criterion(output,labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(trainloader)}")

Training loss: 1.9305082241863585
Training loss: 0.8868054437484822
Training loss: 0.532507248715297
Training loss: 0.43182582889538584
Training loss: 0.38647350848420087
