In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                ])
# Download and load the training data
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [2]:
# Build a feed-forward network
model = nn.Sequential(nn.Linear(784, 128),
                        nn.ReLU(),
                        nn.Linear(128, 64),
                        nn.ReLU(),
                        nn.Linear(64, 10))

# Define the loss
criterion = nn.CrossEntropyLoss()

# Get our data
images, labels = next(iter(trainloader))

# Flatten images
images = images.view(images.shape[0], -1)

# Forward pass, get our logits
logits = model(images)
# Calculate the loss the logits and the labels
loss = criterion(logits, labels)

print(loss)

tensor(2.3240, grad_fn=<NllLossBackward>)


In [3]:
# Build a feed-forward network
model = nn.Sequential(nn.Linear(784, 128),
                        nn.ReLU(),
                        nn.Linear(128, 64),
                        nn.ReLU(),
                        nn.Linear(64, 10),
                        nn.LogSoftmax(dim=1))

# Define the loss
criterion = nn.NLLLoss()

# Get our data
images, labels = next(iter(trainloader))

# Flatten images
images = images.view(images.shape[0], -1)

# Forward pass, get our logits
logits = model(images)
# Calculate the loss the logits and the labels
loss = criterion(logits, labels)

print(loss)

tensor(2.3425, grad_fn=<NllLossBackward>)


In [4]:
x = torch.randn(2, 2, requires_grad=True)
print(x)

tensor([[-0.8142,  0.4013],
        [ 1.2131,  0.4736]], requires_grad=True)


In [5]:
y = x**2
print(y)

tensor([[0.6629, 0.1610],
        [1.4715, 0.2243]], grad_fn=<PowBackward0>)


In [6]:
print(y.grad_fn)

<PowBackward0 object at 0x000001F37522F438>


In [7]:
z = y.mean()
print(z)

tensor(0.6299, grad_fn=<MeanBackward1>)


In [8]:
print(x.grad)

None


In [9]:
z.backward()
print(x.grad)
print(x/2)

tensor([[-0.4071,  0.2006],
        [ 0.6065,  0.2368]])
tensor([[-0.4071,  0.2006],
        [ 0.6065,  0.2368]], grad_fn=<DivBackward0>)


In [10]:
# Build a feed-forward network
model = nn.Sequential(nn.Linear(784, 128),
                        nn.ReLU(),
                        nn.Linear(128, 64),
                        nn.ReLU(),
                        nn.Linear(64, 10),
                        nn.LogSoftmax(dim=1))

# Define the loss
criterion = nn.NLLLoss()
# Get our data
images, labels = next(iter(trainloader))
# Flatten images
images = images.view(images.shape[0], -1)
# Forward pass, get our logits
logps = model(images)
# Calculate the loss the logits and the labels
loss = criterion(logps, labels)

print(loss)

tensor(2.2915, grad_fn=<NllLossBackward>)


In [11]:
print('Before backward pass: \n', model[0].weight.grad)
loss.backward()
print('After backward pass: \n', model[0].weight.grad)

Before backward pass: 
 None
After backward pass: 
 tensor([[-0.0015, -0.0015, -0.0015,  ..., -0.0015, -0.0015, -0.0015],
        [-0.0000, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0000],
        [-0.0008, -0.0008, -0.0008,  ..., -0.0008, -0.0008, -0.0008],
        ...,
        [ 0.0022,  0.0022,  0.0022,  ...,  0.0022,  0.0022,  0.0022],
        [-0.0007, -0.0007, -0.0007,  ..., -0.0007, -0.0007, -0.0007],
        [ 0.0003,  0.0003,  0.0003,  ...,  0.0003,  0.0003,  0.0003]])


In [12]:
from torch import optim
optimizer = optim.SGD(model.parameters(), lr = 0.01)

In [13]:
print('Initial weights - ', model[0].weight)
images, labels = next(iter(trainloader))
images.resize_(64, 784)

#Clear the gradients, do this because gradients are accumulated
optimizer.zero_grad()

#Forward pass, then backward pass, then update weights
output = model.forward(images)
loss = criterion(output, labels)
loss.backward()
print('Gradient -', model[0].weight.grad)

Initial weights -  Parameter containing:
tensor([[-0.0233, -0.0130,  0.0236,  ..., -0.0347, -0.0210, -0.0334],
        [-0.0247, -0.0226,  0.0114,  ..., -0.0096,  0.0272,  0.0067],
        [-0.0122,  0.0064,  0.0317,  ...,  0.0349,  0.0203,  0.0020],
        ...,
        [-0.0143,  0.0216, -0.0322,  ..., -0.0078, -0.0043, -0.0028],
        [-0.0151,  0.0250, -0.0248,  ...,  0.0130, -0.0236,  0.0247],
        [ 0.0102,  0.0086,  0.0191,  ..., -0.0248,  0.0142, -0.0224]],
       requires_grad=True)
Gradient - tensor([[ 0.0037,  0.0037,  0.0037,  ...,  0.0037,  0.0037,  0.0037],
        [-0.0005, -0.0005, -0.0005,  ..., -0.0005, -0.0005, -0.0005],
        [ 0.0007,  0.0007,  0.0007,  ...,  0.0007,  0.0007,  0.0007],
        ...,
        [ 0.0033,  0.0033,  0.0033,  ...,  0.0033,  0.0033,  0.0033],
        [-0.0007, -0.0007, -0.0007,  ..., -0.0007, -0.0007, -0.0007],
        [ 0.0012,  0.0012,  0.0012,  ...,  0.0012,  0.0012,  0.0012]])


In [14]:
optimizer.step()
print('Update weights -', model[0].weight)

Update weights - Parameter containing:
tensor([[-0.0233, -0.0130,  0.0236,  ..., -0.0348, -0.0211, -0.0334],
        [-0.0247, -0.0226,  0.0114,  ..., -0.0096,  0.0272,  0.0067],
        [-0.0123,  0.0064,  0.0317,  ...,  0.0349,  0.0203,  0.0020],
        ...,
        [-0.0143,  0.0215, -0.0322,  ..., -0.0078, -0.0043, -0.0028],
        [-0.0151,  0.0250, -0.0248,  ...,  0.0130, -0.0236,  0.0247],
        [ 0.0102,  0.0086,  0.0191,  ..., -0.0248,  0.0142, -0.0224]],
       requires_grad=True)


In [17]:
model = nn.Sequential(nn.Linear(784, 128),
                     nn.ReLU(),
                     nn.Linear(128, 64),
                     nn.ReLU(),
                     nn.Linear(64, 10),
                     nn.LogSoftmax(dim = 1))

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.003)
epochs = 5
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        images = images.view(images.shape[0], -1)
        
        optimizer.zero_grad()
        
        output = model.forward(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    else:
        print(f'Training losss: {running_loss/len(trainloader)}')

Training losss: 1.9368858578871053
Training losss: 0.8695029386960621
Training losss: 0.5213007675622826
Training losss: 0.42499583505237026
Training losss: 0.3808873127232482


In [19]:
%matplotlib inline
import helper

In [22]:
images, labels = next(iter(trainloader))

img = images[0].view(1, 784)

with torch.no_grad():
    logits = model.forward(img)
    
ps = F.softmax(logits, dim=1)
helper.view_classify(img.view(1, 28, 28), ps)

AttributeError: module 'helper' has no attribute 'view_classify'