### Imports

In [38]:
import torch
from torch import autograd

### Tensors and Variables
Declare a torch tensor of the size mentioned as argument. This is auto-initialized with zeros.

In [13]:
x = torch.Tensor(2)
y = torch.Tensor(4, 2)
print(x)
print(y)

tensor([0.0000, 0.0000])
tensor([[0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000]])


In [16]:
print(len(y))

4


#### Declaring a tensor with random values

In [21]:
randx = torch.rand(2, 3)
randx

tensor([[0.4711, 0.5733, 0.3298],
        [0.8608, 0.7944, 0.7181]])

#### Declaring a tensor with ones

In [32]:
onex = torch.ones(2,3)
onex

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [24]:
onex*2

tensor([[2., 2., 2.],
        [2., 2., 2.]])

In [33]:
onex[:, 1] = onex[:, 1]*2
onex

tensor([[1., 2., 1.],
        [1., 2., 1.]])

### Autograd

Creating a variable from a tensor

In [51]:
x = autograd.Variable(torch.ones(2, 2) * 2, requires_grad=True)
x

tensor([[2., 2.],
        [2., 2.]], requires_grad=True)

In [52]:
z = 2 * (x * x) + 5 * x
z

tensor([[18., 18.],
        [18., 18.]], grad_fn=<ThAddBackward>)

To compute the gradient, we need to compute it against a vector x to get a d/dx

In [53]:
z.backward(torch.ones(2, 2))
print(x.grad)

tensor([[13., 13.],
        [13., 13.]])


This is the result we get after taking the derivative of 2x^2 + 5x i.e. 4x + 5

### Neural Network Class

In [54]:
import torch.nn as nn
import torch.nn.functional as F

In [94]:
# Inheriting from the nn.Module class
class Net(nn.Module):
    def __init__(self):
        # "super" creates an instance of the base nn.Module class
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 200)
        self.fc2 = nn.Linear(200, 200)
        self.fc3 = nn.Linear(200, 10)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x)

A fully connected neural network layer is represented by the nn.Linear object, with the first argument in the definition being the number of nodes in layer l and the next argument being the number of nodes in layer l+1. As you can observe, the first layer takes the 28 x 28 input pixels and connects to the first 200 node hidden layer. Then we have another 200 to 200 hidden layer, and finally a connection between the last hidden layer and the output layer.

### Visualizing the network

In [96]:
net = Net()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=200, bias=True)
  (fc3): Linear(in_features=200, out_features=10, bias=True)
)


### Optimizer and Loss

In [97]:
from torch import optim

learning_rate = 0.01
momentum = 0.9

# create a stochastic gradient descent optimizer
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)

# create a Negative Log Likelihood function
criterion = nn.NLLLoss()

### Loading the data - MNIST Dataset <br /> 
we'll also need DataLoaders for the dataset. This is where TorchVision comes into play. It let's use load the MNIST dataset in a handy way. We'll use a batch_size of 64 for training and size 1000 for testing on this dataset. The values 0.1307 and 0.3081 used for the Normalize() transformation below are the global mean and standard deviation of the MNIST dataset, we'll take them as a given here.

In [98]:
import torchvision

n_epochs = 3
batch_size_train = 64
batch_size_test = 1000
log_interval = 10
random_seed = 1
torch.backends.cudnn.enabled = True
torch.manual_seed(random_seed)

# Training data set
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('./mnist_train/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

# Testing data set
test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('./mnist_train/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

### Running the Training Loop

In [99]:
log_interval=10
epochs=10
log_interval=10

# run the main training loop
for epoch in range(epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = autograd.Variable(data), autograd.Variable(target)
        # resize data from (batch_size, 1, 28, 28) to (batch_size, 28*28)
        data = data.view(-1, 28*28)
        optimizer.zero_grad()
        net_out = net(data)
        loss = criterion(net_out, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.data[0]))

# run a test loop
test_loss = 0
correct = 0
for data, target in test_loader:
    data, target = autograd.Variable(data, volatile=True), autograd.Variable(target)
    data = data.view(-1, 28 * 28)
    net_out = net(data)
    # sum up batch loss
    test_loss += criterion(net_out, target).data[0]
    pred = net_out.data.max(1)[1]  # get the index of the max log-probability
    correct += pred.eq(target.data).sum()

test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

  

















Test set: Average loss: 0.0001, Accuracy: 9817/10000 (98%)

