In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets,transforms

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5,),(0.5,)),])
trainset = datasets.MNIST('~/.pytorch/MNIST_data/',download=True,train=True,transform=transform)
trainloader = torch.utils.data.DataLoader(trainset,batch_size=64,shuffle=True)

In [3]:
model = nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                     nn.ReLU(),
                     nn.Linear(64,10))
criterion = nn.CrossEntropyLoss()

images, labels = next(iter(trainloader))
images = images.view(images.shape[0],-1)


In [4]:
logits = model(images)
loss = criterion(logits, labels)
print(loss)

tensor(2.2945, grad_fn=<NllLossBackward>)


In [5]:
model = nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                     nn.ReLU(),
                     nn.Linear(64,10),
                     nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()

images, labels = next(iter(trainloader))
images = images.view(images.shape[0],-1)
logits = model(images)
loss = criterion(logits, labels)
print(loss)

tensor(2.2844, grad_fn=<NllLossBackward>)


# autograd

In [6]:
x = torch.randn(2,2,requires_grad=True)
print(x)

tensor([[-0.8873, -1.2122],
        [-0.6242, -0.1857]], requires_grad=True)


In [7]:
y = x**2
print(y)

tensor([[0.7872, 1.4693],
        [0.3896, 0.0345]], grad_fn=<PowBackward0>)


In [8]:
y.grad_fn

<PowBackward0 at 0x7fa61650ec18>

In [9]:
z = y.mean()
z

tensor(0.6702, grad_fn=<MeanBackward0>)

In [10]:
print(x.grad)

None


In [11]:
z.backward()
print(x.grad)
print(x/2)

tensor([[-0.4436, -0.6061],
        [-0.3121, -0.0929]])
tensor([[-0.4436, -0.6061],
        [-0.3121, -0.0929]], grad_fn=<DivBackward0>)


In [12]:
#loss and autograd together

In [13]:
model = nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                      nn.ReLU(),
                      nn.Linear(64,10),
                     nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0],-1)

logps = model(images)
loss = criterion(logps,labels)

In [14]:
print(model[0].weight.grad)
loss.backward()
print(model[0].weight.grad)

None
tensor([[ 2.2116e-03,  2.2116e-03,  2.2116e-03,  ...,  2.2116e-03,
          2.2116e-03,  2.2116e-03],
        [ 2.7368e-03,  2.7368e-03,  2.7368e-03,  ...,  2.7368e-03,
          2.7368e-03,  2.7368e-03],
        [ 1.5768e-03,  1.5768e-03,  1.5768e-03,  ...,  1.5768e-03,
          1.5768e-03,  1.5768e-03],
        ...,
        [-2.1358e-06, -2.1358e-06, -2.1358e-06,  ..., -2.1358e-06,
         -2.1358e-06, -2.1358e-06],
        [-3.8900e-03, -3.8900e-03, -3.8900e-03,  ..., -3.8900e-03,
         -3.8900e-03, -3.8900e-03],
        [-9.7898e-04, -9.7898e-04, -9.7898e-04,  ..., -9.7898e-04,
         -9.7898e-04, -9.7898e-04]])


In [15]:
#training the network 

In [16]:
from torch import optim 
optimizer  = optim.SGD(model.parameters(),lr=0.01)

In [17]:
print(model[0].weight)
images, labels = next(iter(trainloader))
images.resize_(64,784)
optimizer.zero_grad()
output = model.forward(images)
loss = criterion(output, labels)
loss.backward()
print(model[0].weight.grad)

Parameter containing:
tensor([[-0.0150,  0.0087, -0.0152,  ...,  0.0264,  0.0058,  0.0273],
        [-0.0024, -0.0034, -0.0105,  ..., -0.0350, -0.0141, -0.0277],
        [ 0.0054, -0.0096, -0.0270,  ...,  0.0140, -0.0110,  0.0033],
        ...,
        [-0.0049, -0.0096, -0.0086,  ...,  0.0239,  0.0313,  0.0108],
        [ 0.0283,  0.0084,  0.0124,  ..., -0.0262, -0.0271,  0.0070],
        [-0.0215, -0.0142,  0.0227,  ..., -0.0150,  0.0008,  0.0329]],
       requires_grad=True)
tensor([[-8.1747e-04, -8.1747e-04, -8.1747e-04,  ..., -8.1747e-04,
         -8.1747e-04, -8.1747e-04],
        [ 8.4158e-04,  8.4158e-04,  8.4158e-04,  ...,  8.4158e-04,
          8.4158e-04,  8.4158e-04],
        [-7.6560e-05, -7.6560e-05, -7.6560e-05,  ..., -7.6560e-05,
         -7.6560e-05, -7.6560e-05],
        ...,
        [-8.1962e-06, -8.1962e-06, -8.1962e-06,  ..., -8.1962e-06,
         -8.1962e-06, -8.1962e-06],
        [-1.6292e-03, -1.6292e-03, -1.6292e-03,  ..., -1.6292e-03,
         -1.6292e-03, -1.

In [18]:
optimizer.step()
model[0].weight

Parameter containing:
tensor([[-0.0150,  0.0087, -0.0152,  ...,  0.0264,  0.0058,  0.0273],
        [-0.0024, -0.0034, -0.0105,  ..., -0.0350, -0.0141, -0.0277],
        [ 0.0054, -0.0096, -0.0270,  ...,  0.0140, -0.0110,  0.0033],
        ...,
        [-0.0049, -0.0096, -0.0086,  ...,  0.0239,  0.0313,  0.0108],
        [ 0.0283,  0.0084,  0.0124,  ..., -0.0262, -0.0271,  0.0070],
        [-0.0215, -0.0142,  0.0227,  ..., -0.0150,  0.0008,  0.0329]],
       requires_grad=True)

In [19]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.003)

epochs = 5

In [20]:
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        images = images.view(images.shape[0],-1)
        
        optimizer.zero_grad()
        loss= model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(trainloader)}")

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [None]:
%matplotlib inline
import helper

images, labels = next(iter(trainloader))

img = images[0].view(1, 784)

In [None]:
with torch.no_grad():
    logps = model(img)

In [None]:
ps = torch.exp(logps)
helper.view_classify(img.view(1, 28, 28), ps)
