<a href="https://colab.research.google.com/github/sdsingh/learnstreet/blob/master/Pytorch_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Installing Pyorch

!pip install torch
!pip install torchvision



In [2]:
#@title Import Dependencies

import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable

In [3]:
#@title Define Hyperparameters

input_size = 784 # img_size = (28,28) ---> 28*28=784 in total
hidden_size = 500 # number of nodes at hidden layer
num_classes = 10 # number of output classes discrete range [0,9]
num_epochs = 20 # number of times which the entire dataset is passed throughout the model
batch_size = 100 # the size of input data took for one iteration
lr = 1e-3 # size of step

In [4]:
#@title Downloading MNIST data

train_data = dsets.MNIST(root = './data', train = True,
                        transform = transforms.ToTensor(), download = True)

test_data = dsets.MNIST(root = './data', train = False,
                       transform = transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:10<00:00, 941kB/s] 


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 135kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:01<00:00, 1.11MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.33MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [11]:
# data, targets. need to flatten train data.
print(train_data.data.size())
print(train_data.targets.size())

torch.Size([60000, 28, 28])
torch.Size([60000])


In [16]:
#@title Loading the data

train_gen = torch.utils.data.DataLoader(dataset = train_data,
                                             batch_size = batch_size,
                                             shuffle = True)

test_gen = torch.utils.data.DataLoader(dataset = test_data,
                                      batch_size = batch_size,
                                      shuffle = False)

In [17]:
#@title Define model class

class Net(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(Net,self).__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, num_classes)

  def forward(self,x):
    out = self.fc1(x)
    out = self.relu(out)
    out = self.fc2(out)
    return out

In [18]:
#@title Build the model

net = Net(input_size, hidden_size, num_classes)
if torch.cuda.is_available():
  net.cuda()

In [161]:
#@title Define loss-function & optimizer

loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

In [162]:
# zips over X, Y. batch dim is unsqueezed.
train_data[0][0].shape, train_data[0][1]

(torch.Size([1, 28, 28]), 5)

In [163]:
# Batch size 100.
# Why is the first dim still unsqueezed, wtf?
it = iter(train_gen)
batch = next(it)
print(batch[0].shape, batch[1].shape)

torch.Size([100, 1, 28, 28]) torch.Size([100])


In [164]:
# batch[0].view(-1,28*28).shape
batch[0].squeeze().view(-1, 28*28)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [None]:
#@title Training the model

for epoch in range(num_epochs):
  for i ,(images,labels) in enumerate(train_gen):
    # images = images.view(-1,28*28).cuda()
    # This is what I would've done, I don't quite get what view is doing here.
    images = images.squeeze().view(-1,input_size).cuda()
    labels = labels.cuda()

    optimizer.zero_grad()
    outputs = net(images)
    loss = loss_function(outputs, labels)
    loss.backward()
    optimizer.step()

    if (i+1) % 100 == 0:
      print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                 %(epoch+1, num_epochs, i+1, len(train_data)//batch_size, loss.item()))

In [50]:
#@title Evaluating the accuracy of the model

correct = 0
total = 0
for images,labels in test_gen:
  images = Variable(images.view(-1,28*28)).cuda()
  labels = labels.cuda()

  output = net(images)
  _, predicted = torch.max(output,1)
  correct += (predicted == labels).sum()
  total += labels.size(0)

print('Accuracy of the model: %.3f %%' %((100*correct)/(total+1)))

Accuracy of the model: 98.320 %


In [57]:
for k in net.modules():
  # print(k, v)
  print(k)

Net(
  (fc1): Linear(in_features=784, out_features=500, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)
Linear(in_features=784, out_features=500, bias=True)
ReLU()
Linear(in_features=500, out_features=10, bias=True)


In [93]:
m = list(net.modules())[1:]
m[0].weight.shape, m[0].bias.shape

(torch.Size([500, 784]), torch.Size([500]))

In [103]:
images.shape

torch.Size([100, 784])

In [166]:
optimizer.zero_grad()
outputs = net(images)
loss = loss_function(outputs, labels)
# outputs = net(images[:10, :])
# loss = loss_function(outputs, labels[:10])
loss.backward()
# optimizer.step()

In [167]:
m[0].weight.grad.nonzero().shape

torch.Size([175679, 2])

In [168]:
m[0].bias.grad.nonzero().shape

torch.Size([455, 1])

In [170]:
m[2].weight.grad.nonzero().shape

torch.Size([4541, 2])

In [118]:
optimizer.zero_grad()

In [121]:
with torch.no_grad():
  o = net(images[0, :].unsqueeze(0))

In [122]:
o

tensor([[-17.4796, -11.2568,  -7.1091,  -4.2298, -20.6758,  -8.2732, -16.1996,
         -35.7887,  15.9743, -24.6586]], device='cuda:0')

### Quick Manual Repro of Forward

In [154]:
# W * x + b
# W = (ninput x nhidden1)
# x = (ninput)
# b = nhidden1
h1 = torch.matmul(m[0].weight, images[0, :]) + m[0].bias
h1[h1 < 0] = 0.0
manual_o = torch.matmul(m[2].weight, h1) + m[2].bias

In [153]:
# doesn't work after relu
# torch.allclose(h1, m[0].forward(images[0, :]))

False

In [155]:
torch.allclose(o, manual_o)

True

In [159]:
torch.softmax(o, 1)

tensor([[2.9590e-15, 1.4917e-12, 9.4407e-11, 1.6806e-09, 1.2108e-16, 2.9473e-11,
         1.0643e-14, 3.3082e-23, 1.0000e+00, 2.2560e-18]], device='cuda:0')

In [160]:
o

tensor([[-17.4796, -11.2568,  -7.1091,  -4.2298, -20.6758,  -8.2732, -16.1996,
         -35.7887,  15.9743, -24.6586]], device='cuda:0')