In [22]:
# -*- coding: utf-8 -*-
"""
nn package
==========
We’ve redesigned the nn package, so that it’s fully integrated with
autograd. Let's review the changes.
**Replace containers with autograd:**
    You no longer have to use Containers like ``ConcatTable``, or modules like
    ``CAddTable``, or use and debug with nngraph. We will seamlessly use
    autograd to define our neural networks. For example,
    * ``output = nn.CAddTable():forward({input1, input2})`` simply becomes
      ``output = input1 + input2``
    * ``output = nn.MulConstant(0.5):forward(input)`` simply becomes
      ``output = input * 0.5``
**State is no longer held in the module, but in the network graph:**
    Using recurrent networks should be simpler because of this reason. If
    you want to create a recurrent network, simply use the same Linear layer
    multiple times, without having to think about sharing weights.
    .. figure:: /_static/img/torch-nn-vs-pytorch-nn.png
       :alt: torch-nn-vs-pytorch-nn
       torch-nn-vs-pytorch-nn
**Simplified debugging:**
    Debugging is intuitive using Python’s pdb debugger, and **the debugger
    and stack traces stop at exactly where an error occurred.** What you see
    is what you get.
Example 1: ConvNet
------------------
Let’s see how to create a small ConvNet.
All of your networks are derived from the base class ``nn.Module``:
-  In the constructor, you declare all the layers you want to use.
-  In the forward function, you define how your model is going to be
   run, from input to output
"""

import torch
import torch.nn as nn
import torch.nn.functional as F


class MNISTConvNet(nn.Module):

    def __init__(self):
        # this is the place where you instantiate all your modules
        # you can later access them using the same names you've given them in
        # here
        super(MNISTConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(10, 20, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    # it's the forward function that defines the network structure
    # we're accepting only a single input in here, but if you want,
    # feel free to use more
    def forward(self, input):
        x = self.pool1(F.relu(self.conv1(input)))
        x = self.pool2(F.relu(self.conv2(x)))

        # in your model definition you can go full crazy and use arbitrary
        # python code to define your model structure
        # all these are perfectly legal, and will be handled correctly
        # by autograd:
        # if x.gt(0) > x.numel() / 2:
        #      ...
        #
        # you can even do a loop and reuse the same module inside it
        # modules no longer hold ephemeral state, so you can use them
        # multiple times during your forward pass
        # while x.norm(2) < 10:
        #    x = self.conv1(x)

        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

###############################################################
# Let's use the defined ConvNet now.
# You create an instance of the class first.


net = MNISTConvNet()
print(net)

########################################################################
# .. note::
#
#     ``torch.nn`` only supports mini-batches The entire ``torch.nn``
#     package only supports inputs that are a mini-batch of samples, and not
#     a single sample.
#
#     For example, ``nn.Conv2d`` will take in a 4D Tensor of
#     ``nSamples x nChannels x Height x Width``.
#
#     If you have a single sample, just use ``input.unsqueeze(0)`` to add
#     a fake batch dimension.
#
# Create a mini-batch containing a single sample of random data and send the
# sample through the ConvNet.

input = torch.randn(1, 1, 28, 28)
out = net(input)
print(out.size())

########################################################################
# Define a dummy target label and compute error using a loss function.

target = torch.tensor([3], dtype=torch.long)
loss_fn = nn.CrossEntropyLoss()  # LogSoftmax + ClassNLL Loss
err = loss_fn(out, target)
err.backward()

print(err)

########################################################################
# The output of the ConvNet ``out`` is a ``Tensor``. We compute the loss
# using that, and that results in ``err`` which is also a ``Tensor``.
# Calling ``.backward`` on ``err`` hence will propagate gradients all the
# way through the ConvNet to it’s weights
#
# Let's access individual layer weights and gradients:

print(net.conv1.weight.grad.size())

########################################################################
print(net.conv1.weight.data.norm())  # norm of the weight
print(net.conv1.weight.grad.data.norm())  # norm of the gradients

########################################################################
# Forward and Backward Function Hooks
# -----------------------------------
#
# We’ve inspected the weights and the gradients. But how about inspecting
# / modifying the output and grad\_output of a layer?
#
# We introduce **hooks** for this purpose.
#
# You can register a function on a ``Module`` or a ``Tensor``.
# The hook can be a forward hook or a backward hook.
# The forward hook will be executed when a forward call is executed.
# The backward hook will be executed in the backward phase.
# Let’s look at an example.
#
# We register a forward hook on conv2 and print some information


def printnorm(self, input, output):
    # input is a tuple of packed inputs
    # output is a Tensor. output.data is the Tensor we are interested
    print('Inside ' + self.__class__.__name__ + ' forward')
    print('')
    print('input: ', type(input))
    print('input[0]: ', type(input[0]))
    print('output: ', type(output))
    print('')
    print('input size:', input[0].size())
    print('output size:', output.data.size())
    print('output norm:', output.data.norm())


net.conv2.register_forward_hook(printnorm)

out = net(input)

########################################################################
#
# We register a backward hook on conv2 and print some information


def printgradnorm(self, grad_input, grad_output):
    print('Inside ' + self.__class__.__name__ + ' backward')
    print('Inside class:' + self.__class__.__name__)
    print('')
    print('grad_input: ', type(grad_input))
    print('grad_input[0]: ', type(grad_input[0]))
    print('grad_output: ', type(grad_output))
    print('grad_output[0]: ', type(grad_output[0]))
    print('')
    print('grad_input size:', grad_input[0].size())
    print('grad_output size:', grad_output[0].size())
    print('grad_input norm:', grad_input[0].norm())


net.conv2.register_backward_hook(printgradnorm)

out = net(input)
err = loss_fn(out, target)
err.backward()


MNISTConvNet(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)
torch.Size([1, 10])
tensor(2.2835)
torch.Size([10, 1, 5, 5])
tensor(1.8193)
tensor(0.5207)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([1, 10, 12, 12])
output size: torch.Size([1, 20, 8, 8])
output norm: tensor(13.4471)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([1, 10, 12, 12])
output size: torch.Size([1, 20, 8, 8])
output norm: tensor(13.4471)
Inside Conv2d backward
Inside class

In [23]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

losses = []

for epoch in range(1):  # loop over the dataset multiple times
    with torch.enable_grad(): #hmm wat does this do...
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))

                losses.append(running_loss / 2000)
                running_loss = 0.0

print('Finished Training')


Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(19.8085)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       2.8887)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(20.2112)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
gra

Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(22.3037)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.7419)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(21.5417)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
gra

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.8286)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(21.7138)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-03 *
       9.6416)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(21.2216)
Inside Conv2d backward
Inside class:Conv2d

grad

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.9779)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(21.8384)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       1.0365)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: to

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       2.3030)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(21.5620)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       2.1913)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: to

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.7536)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(20.4523)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       2.6028)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(20.7658)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class '

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.5288)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(21.3224)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.4805)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(21.2877)
Inside Conv2d backward
Inside class:Conv2d

grad

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       2.9535)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(21.7206)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       2.9261)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output n

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.8482)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(23.0387)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.9342)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: to

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.2129)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(23.0569)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.1719)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(23.1913)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class '

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.1479)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(24.1123)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.9866)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(24.1923)
Inside Conv2d backward
Inside class:Conv2d

grad

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       6.2477)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(24.4114)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.5605)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output n

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       6.3678)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(25.3637)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.6610)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: to

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.5965)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(27.2304)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       5.4134)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(27.6045)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class '

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.0194)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(28.8521)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.0152)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(29.9167)
Inside Conv2d backward
Inside class:Conv2d

grad

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       7.0303)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(30.8831)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       5.3651)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output n

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       5.1873)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(33.0326)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       5.9968)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: to

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.2804)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(35.1942)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       2.5036)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(34.1864)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class '

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       7.5626)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(37.4202)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       6.8925)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(36.6063)
Inside Conv2d backward
Inside class:Conv2d

grad

Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(38.3944)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       6.6912)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(37.1060)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
gra

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       7.5269)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(45.7111)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       9.9845)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: to

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.4949)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(51.6638)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       6.9023)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(55.6014)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class '

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       5.7843)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(55.5437)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       6.8563)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(52.0283)
Inside Conv2d backward
Inside class:Conv2d

grad

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       8.2574)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(55.3004)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       7.5515)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output n

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1089)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(59.3510)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       6.1142)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       8.4876)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(57.5858)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       6.4244)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(59.9640)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class '

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       8.9463)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(62.0230)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       8.9339)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(62.0815)
Inside Conv2d backward
Inside class:Conv2d

grad

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       5.2352)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(73.4423)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       7.3487)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output n

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       5.1977)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(76.4436)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.8716)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: to

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       7.5837)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(73.6253)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1036)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(76.5344)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1180)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(81.0836)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       9.1570)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(83.5339)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tupl

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       7.7590)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(85.0231)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1274)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(82.6046)


Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1012)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(88.1104)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       9.8372)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       5.9079)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(94.5344)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       7.0542)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(90.9526)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class '

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.0505)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(88.2902)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1577)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(84.4142)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tupl

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1132)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(89.7010)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.0248)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(97.9610)


Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.5040)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(88.8754)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.4176)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: to

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       9.9300)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(106.4271)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1301)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(92.7749)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.2296)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(105.2027)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1241)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(96.7125)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1239)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(110.8491)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       1.7667)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(97.0058)

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       7.1052)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(108.3002)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1092)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12,

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       9.5761)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(99.9798)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       6.0286)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(104.4640)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       1.5230)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(104.5024)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1322)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(106.8628)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tu

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       1.9536)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(109.3929)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       1.8511)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output 

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.6432)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(109.5273)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.9397)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: t

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1164)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(113.2351)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.2482)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(109.6717)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.T

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.1075)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(111.9348)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       4.6792)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(105.2218)
Inside Conv2d backward
Inside class:Conv2d

gr

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1177)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(116.3996)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1248)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(110.8803)
Inside Conv2d backw

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1918)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(112.3345)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.2531)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: to

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       9.7883)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(110.5780)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       1.8369)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(120.0853)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class

grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.2310)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(115.8515)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1141)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(126.9285)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]: 

grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       8.2453)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(112.6094)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1056)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(120.8377

Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(122.6698)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       2.8850)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(120.9665)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
g

grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-03 *
       4.0650)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(130.4940)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(1.00000e-02 *
       3.4029)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(125.6591)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class

Process Process-17:
Process Process-18:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


 <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1837)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(109.0241)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.Tensor'>

grad_input size: torch.Size([4, 10, 12, 12])
grad_output size: torch.Size([4, 20, 8, 8])
grad_input norm: tensor(0.1123)
Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size: torch.Size([4, 10, 12, 12])
output size: torch.Size([4, 20, 8, 8])
output norm: tensor(127.4377)
Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.T

  File "/Users/silver/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 52, in _worker_loop
    r = index_queue.get()
Exception ignored in: <bound method _DataLoaderIter.__del__ of <torch.utils.data.dataloader._DataLoaderIter object at 0x109e2ca20>>
Traceback (most recent call last):
  File "/Users/silver/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 347, in __del__
    def __del__(self):
  File "/Users/silver/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 178, in handler
    _error_if_any_worker_fails()
RuntimeError: DataLoader worker (pid 63287) exited unexpectedly with exit code 1.


KeyboardInterrupt: 

  File "/Users/silver/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 52, in _worker_loop
    r = index_queue.get()
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/Users/silver/anaconda3/lib/python3.6/multiprocessing/connec