In [None]:
# License: BSD
# Author: Sasank Chilamkurthy

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

plt.ion()   # interactive mode

In [None]:
batch_sz = 8

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_sz,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=1,
                                         shuffle=True, num_workers=2)

classes = [i for i in range(10)]

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))


# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

In [None]:
#https://github.com/pytorch/tutorials/blob/master/beginner_source/former_torchies/nn_tutorial.py

# -*- coding: utf-8 -*-
"""
nn package
==========
We’ve redesigned the nn package, so that it’s fully integrated with
autograd. Let's review the changes.
**Replace containers with autograd:**
    You no longer have to use Containers like ``ConcatTable``, or modules like
    ``CAddTable``, or use and debug with nngraph. We will seamlessly use
    autograd to define our neural networks. For example,
    * ``output = nn.CAddTable():forward({input1, input2})`` simply becomes
      ``output = input1 + input2``
    * ``output = nn.MulConstant(0.5):forward(input)`` simply becomes
      ``output = input * 0.5``
**State is no longer held in the module, but in the network graph:**
    Using recurrent networks should be simpler because of this reason. If
    you want to create a recurrent network, simply use the same Linear layer
    multiple times, without having to think about sharing weights.
    .. figure:: /_static/img/torch-nn-vs-pytorch-nn.png
       :alt: torch-nn-vs-pytorch-nn
       torch-nn-vs-pytorch-nn
**Simplified debugging:**
    Debugging is intuitive using Python’s pdb debugger, and **the debugger
    and stack traces stop at exactly where an error occurred.** What you see
    is what you get.
Example 1: ConvNet
------------------
Let’s see how to create a small ConvNet.
All of your networks are derived from the base class ``nn.Module``:
-  In the constructor, you declare all the layers you want to use.
-  In the forward function, you define how your model is going to be
   run, from input to output
"""

import torch
import torch.nn as nn
import torch.nn.functional as F


class MNISTConvNet(nn.Module):

    def __init__(self):
        # this is the place where you instantiate all your modules
        # you can later access them using the same names you've given them in
        # here
        super(MNISTConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 5)  
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(10, 20, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(320, 50) #320
        self.fc2 = nn.Linear(50, 10)

    # it's the forward function that defines the network structure
    # we're accepting only a single input in here, but if you want,
    # feel free to use more
    def forward(self, input):
        x = self.pool1(F.relu(self.conv1(input)))
        x = self.pool2(F.relu(self.conv2(x)))

        # in your model definition you can go full crazy and use arbitrary
        # python code to define your model structure
        # all these are perfectly legal, and will be handled correctly
        # by autograd:
        # if x.gt(0) > x.numel() / 2:
        #      ...
        #
        # you can even do a loop and reuse the same module inside it
        # modules no longer hold ephemeral state, so you can use them
        # multiple times during your forward pass
        # while x.norm(2) < 10:
        #    x = self.conv1(x)

        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

###############################################################
# Let's use the defined ConvNet now.
# You create an instance of the class first.


net = MNISTConvNet()
print(net)

########################################################################
# .. note::
#
#     ``torch.nn`` only supports mini-batches The entire ``torch.nn``
#     package only supports inputs that are a mini-batch of samples, and not
#     a single sample.
#
#     For example, ``nn.Conv2d`` will take in a 4D Tensor of
#     ``nSamples x nChannels x Height x Width``.
#
#     If you have a single sample, just use ``input.unsqueeze(0)`` to add
#     a fake batch dimension.


In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

losses = []


conv1grad = "nothingYet"
conv2grad = "nothingHereEither"
for epoch in range(10):  # loop over the dataset multiple times
    #with torch.enable_grad(): #hmm wat does this do...
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)              #this is probably forward pass
            
            
            loss = criterion(outputs, labels)  #this is some loss function on the predictions
            
            
            loss.backward()                    #this is gradient of loss function
            
            #print(net.conv1.weight.data)
            conv1grad = copy.deepcopy(net.conv1.weight.grad) #need copy and not a reference
           # optimizer.zero_grad()
            
            
            
#             conv2grad = net.conv2.weight.grad
            net.conv1.weight.grad = conv1grad
            
           # print(conv1grad)
            optimizer.step()
            
            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))

                losses.append(running_loss / 2000)
                running_loss = 0.0
                
            break
            

print('Finished Training')



In [None]:
#PRINTS FOR DAYS


########################################################################
# The output of the ConvNet ``out`` is a ``Tensor``. We compute the loss
# using that, and that results in ``err`` which is also a ``Tensor``.
# Calling ``.backward`` on ``err`` hence will propagate gradients all the
# way through the ConvNet to it’s weights
#
# Let's access individual layer weights and gradients:

# print("THIS IS CONV1 WEIGHT GRAD SIZE")
# print(net.conv1.weight.grad.size())

# print("THIS IS CONV1 WEIGHT GRAD")
# print(net.conv1.weight.grad)

#'MaxPool2d' object has no attribute 'weight'
# print("THIS IS POOL1 WEIGHT GRAD SIZE")
# print(net.pool1.weight.grad.size())

# print("THIS IS POOL1 WEIGHT GRAD")
# print(net.pool1.weight.grad)


# print("THIS IS CONV2 WEIGHT GRAD SIZE")
# print(net.conv2.weight.grad.size())

# print("THIS IS CONV2 WEIGHT GRAD")
# print(net.conv2.weight.grad)

# print("THIS IS FC1 WEIGHT GRAD SIZE")
# print(net.fc1.weight.grad.size())

# print("THIS IS FC1 WEIGHT GRAD")
# print(net.fc1.weight.grad)

# print("THIS IS FC2 WEIGHT GRAD SIZE")
# print(net.fc2.weight.grad.size())

# print("THIS IS FC2 WEIGHT GRAD")
# print(net.fc2.weight.grad)

In [None]:
plt.plot(losses)
plt.title("Losses over 50 Epochs")
plt.ylabel('Loss')
plt.show()

In [None]:
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(1)))

Process Process-103:
Process Process-104:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/silver/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 52, in _worker_loop
    r = index_queue.get()
  File "/Users/silver/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 52, in _worker_loop
    r = index_queue.get()
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res

In [None]:
#suppose i am interested in first layer
print(net.conv1.weight.grad)

# #outputs = net(images)#forward pass

# print(outputs)
# loss = criterion(outputs, labels)
# print(loss.item())
# print(net.conv1.weight.data.grad)
# print(net.conv1.weight.data)


In [None]:
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
                              for j in range(1)))