In [14]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets,transforms

In [15]:
transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5,),(0.5,)),])
trainset = datasets.MNIST('~/.pytorch/MNIST_data/',download=True,train=True,transform=transform)
trainloader = torch.utils.data.DataLoader(trainset,batch_size=64,shuffle=True)

In [16]:
model = nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                     nn.ReLU(),
                     nn.Linear(64,10))
criterion = nn.CrossEntropyLoss()

images, labels = next(iter(trainloader))
images = images.view(images.shape[0],-1)


In [17]:
logits = model(images)
loss = criterion(logits, labels)
print(loss)

tensor(2.2810, grad_fn=<NllLossBackward>)


In [18]:
model = nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                     nn.ReLU(),
                     nn.Linear(64,10),
                     nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()

images, labels = next(iter(trainloader))
images = images.view(images.shape[0],-1)
logits = model(images)
loss = criterion(logits, labels)
print(loss)

tensor(2.3113, grad_fn=<NllLossBackward>)


# autograd

In [19]:
x = torch.randn(2,2,requires_grad=True)
print(x)

tensor([[2.2153, 0.2268],
        [0.5060, 1.3834]], requires_grad=True)


In [20]:
y = x**2
print(y)

tensor([[4.9077, 0.0514],
        [0.2561, 1.9138]], grad_fn=<PowBackward0>)


In [21]:
y.grad_fn

<PowBackward0 at 0x7f70595f3ba8>

In [22]:
z = y.mean()
z

tensor(1.7823, grad_fn=<MeanBackward0>)

In [23]:
print(x.grad)

None


In [24]:
z.backward()
print(x.grad)
print(x/2)

tensor([[1.1077, 0.1134],
        [0.2530, 0.6917]])
tensor([[1.1077, 0.1134],
        [0.2530, 0.6917]], grad_fn=<DivBackward0>)


In [25]:
#loss and autograd together

In [27]:
model = nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                      nn.ReLU(),
                      nn.Linear(64,10),
                     nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0],-1)

logps = model(images)
loss = criterion(logps,labels)

In [28]:
print(model[0].weight.grad)
loss.backward()
print(model[0].weight.grad)

None
tensor([[-0.0002, -0.0002, -0.0002,  ..., -0.0002, -0.0002, -0.0002],
        [ 0.0016,  0.0016,  0.0016,  ...,  0.0016,  0.0016,  0.0016],
        [ 0.0014,  0.0014,  0.0014,  ...,  0.0014,  0.0014,  0.0014],
        ...,
        [-0.0011, -0.0011, -0.0011,  ..., -0.0011, -0.0011, -0.0011],
        [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
        [-0.0026, -0.0026, -0.0026,  ..., -0.0026, -0.0026, -0.0026]])


In [29]:
#training the network 

In [30]:
from torch import optim 
optimizer  = optim.SGD(model.parameters(),lr=0.01)

In [31]:
print(model[0].weight)
images, labels = next(iter(trainloader))
images.resize_(64,784)
optimizer.zero_grad()
output = model.forward(images)
loss = criterion(output, labels)
loss.backward()
print(model[0].weight.grad)

Parameter containing:
tensor([[-0.0205,  0.0074, -0.0194,  ...,  0.0139,  0.0271, -0.0056],
        [-0.0022,  0.0266, -0.0130,  ..., -0.0262, -0.0037, -0.0110],
        [-0.0312,  0.0203, -0.0195,  ..., -0.0034,  0.0249,  0.0287],
        ...,
        [-0.0150, -0.0043, -0.0352,  ...,  0.0187,  0.0121,  0.0347],
        [ 0.0062,  0.0291,  0.0128,  ...,  0.0179,  0.0021,  0.0103],
        [-0.0275, -0.0324, -0.0305,  ...,  0.0122, -0.0015, -0.0034]],
       requires_grad=True)
tensor([[ 4.7561e-04,  4.7561e-04,  4.7561e-04,  ...,  4.7561e-04,
          4.7561e-04,  4.7561e-04],
        [-1.7382e-03, -1.7382e-03, -1.7382e-03,  ..., -1.7382e-03,
         -1.7382e-03, -1.7382e-03],
        [ 1.5861e-03,  1.5861e-03,  1.5861e-03,  ...,  1.5861e-03,
          1.5861e-03,  1.5861e-03],
        ...,
        [ 9.9102e-06,  9.9102e-06,  9.9102e-06,  ...,  9.9102e-06,
          9.9102e-06,  9.9102e-06],
        [ 1.5347e-03,  1.5347e-03,  1.5347e-03,  ...,  1.5347e-03,
          1.5347e-03,  1.

In [32]:
optimizer.step()
model[0].weight

Parameter containing:
tensor([[-0.0205,  0.0074, -0.0194,  ...,  0.0138,  0.0271, -0.0056],
        [-0.0022,  0.0266, -0.0130,  ..., -0.0262, -0.0036, -0.0110],
        [-0.0312,  0.0203, -0.0195,  ..., -0.0035,  0.0249,  0.0287],
        ...,
        [-0.0150, -0.0043, -0.0352,  ...,  0.0187,  0.0121,  0.0347],
        [ 0.0062,  0.0291,  0.0128,  ...,  0.0179,  0.0021,  0.0103],
        [-0.0275, -0.0324, -0.0305,  ...,  0.0122, -0.0015, -0.0034]],
       requires_grad=True)

In [33]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.003)

epochs = 5

In [39]:
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        images = images.view(images.shape[0],-1)
        
        optimizer.zero_grad()
        loss= model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(trainloader)}")

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [36]:
%matplotlib inline
# import helper

images, labels = next(iter(trainloader))

img = images[0].view(1, 784)

In [37]:
with torch.no_grad():
    logps = model(img)

In [40]:
ps = torch.exp(logps)
helper.view_classify(img.view(1, 28, 28), ps)


NameError: name 'helper' is not defined