# Question:

why `ConvTranspose2d` can be seen as the gradient of `Conv2d` with respect to its input? 

# Answer:

see blog article: [Blog Link](https://simonjisu.github.io/datascience/2019/10/27/convtranspose2d.html)

## Reference:
- https://arxiv.org/abs/1603.07285
- https://en.wikipedia.org/wiki/Toeplitz_matrix

In [1]:
import sympy
from sympy import Symbol, MatrixSymbol, Matrix
import numpy as np

def convolution(x, w, K, N):
    res = []
    for i in range(N-K+1):
        for j in range(N-K+1):
            a = sum(Matrix(x)[i:(K+i), j:(K+j)].multiply_elementwise(Matrix(w)))
            res.append(a)
    return Matrix(res).reshape(N-K+1, N-K+1)

In [2]:
x_input = MatrixSymbol("x^{(l)}", 4, 4)
x_output = MatrixSymbol("x^{(l+1)}", 2, 2)
w = MatrixSymbol("w", 3, 3)

In [3]:
o = convolution(x_input, w, K=3, N=4)
o

Matrix([
[w[0, 0]*x^{(l)}[0, 0] + w[0, 1]*x^{(l)}[0, 1] + w[0, 2]*x^{(l)}[0, 2] + w[1, 0]*x^{(l)}[1, 0] + w[1, 1]*x^{(l)}[1, 1] + w[1, 2]*x^{(l)}[1, 2] + w[2, 0]*x^{(l)}[2, 0] + w[2, 1]*x^{(l)}[2, 1] + w[2, 2]*x^{(l)}[2, 2], w[0, 0]*x^{(l)}[0, 1] + w[0, 1]*x^{(l)}[0, 2] + w[0, 2]*x^{(l)}[0, 3] + w[1, 0]*x^{(l)}[1, 1] + w[1, 1]*x^{(l)}[1, 2] + w[1, 2]*x^{(l)}[1, 3] + w[2, 0]*x^{(l)}[2, 1] + w[2, 1]*x^{(l)}[2, 2] + w[2, 2]*x^{(l)}[2, 3]],
[w[0, 0]*x^{(l)}[1, 0] + w[0, 1]*x^{(l)}[1, 1] + w[0, 2]*x^{(l)}[1, 2] + w[1, 0]*x^{(l)}[2, 0] + w[1, 1]*x^{(l)}[2, 1] + w[1, 2]*x^{(l)}[2, 2] + w[2, 0]*x^{(l)}[3, 0] + w[2, 1]*x^{(l)}[3, 1] + w[2, 2]*x^{(l)}[3, 2], w[0, 0]*x^{(l)}[1, 1] + w[0, 1]*x^{(l)}[1, 2] + w[0, 2]*x^{(l)}[1, 3] + w[1, 0]*x^{(l)}[2, 1] + w[1, 1]*x^{(l)}[2, 2] + w[1, 2]*x^{(l)}[2, 3] + w[2, 0]*x^{(l)}[3, 1] + w[2, 1]*x^{(l)}[3, 2] + w[2, 2]*x^{(l)}[3, 3]]])

In [4]:
C = o.reshape(1, 4).diff(Matrix(x_input).reshape(16, 1)).reshape(16, 4)
C

[[w[0, 0], 0, 0, 0], [w[0, 1], w[0, 0], 0, 0], [w[0, 2], w[0, 1], 0, 0], [0, w[0, 2], 0, 0], [w[1, 0], 0, w[0, 0], 0], [w[1, 1], w[1, 0], w[0, 1], w[0, 0]], [w[1, 2], w[1, 1], w[0, 2], w[0, 1]], [0, w[1, 2], 0, w[0, 2]], [w[2, 0], 0, w[1, 0], 0], [w[2, 1], w[2, 0], w[1, 1], w[1, 0]], [w[2, 2], w[2, 1], w[1, 2], w[1, 1]], [0, w[2, 2], 0, w[1, 2]], [0, 0, w[2, 0], 0], [0, 0, w[2, 1], w[2, 0]], [0, 0, w[2, 2], w[2, 1]], [0, 0, 0, w[2, 2]]]

In [5]:
# forward 
(Matrix(x_input).reshape(1, 16) @ C).reshape(2, 2)

Matrix([
[w[0, 0]*x^{(l)}[0, 0] + w[0, 1]*x^{(l)}[0, 1] + w[0, 2]*x^{(l)}[0, 2] + w[1, 0]*x^{(l)}[1, 0] + w[1, 1]*x^{(l)}[1, 1] + w[1, 2]*x^{(l)}[1, 2] + w[2, 0]*x^{(l)}[2, 0] + w[2, 1]*x^{(l)}[2, 1] + w[2, 2]*x^{(l)}[2, 2], w[0, 0]*x^{(l)}[0, 1] + w[0, 1]*x^{(l)}[0, 2] + w[0, 2]*x^{(l)}[0, 3] + w[1, 0]*x^{(l)}[1, 1] + w[1, 1]*x^{(l)}[1, 2] + w[1, 2]*x^{(l)}[1, 3] + w[2, 0]*x^{(l)}[2, 1] + w[2, 1]*x^{(l)}[2, 2] + w[2, 2]*x^{(l)}[2, 3]],
[w[0, 0]*x^{(l)}[1, 0] + w[0, 1]*x^{(l)}[1, 1] + w[0, 2]*x^{(l)}[1, 2] + w[1, 0]*x^{(l)}[2, 0] + w[1, 1]*x^{(l)}[2, 1] + w[1, 2]*x^{(l)}[2, 2] + w[2, 0]*x^{(l)}[3, 0] + w[2, 1]*x^{(l)}[3, 1] + w[2, 2]*x^{(l)}[3, 2], w[0, 0]*x^{(l)}[1, 1] + w[0, 1]*x^{(l)}[1, 2] + w[0, 2]*x^{(l)}[1, 3] + w[1, 0]*x^{(l)}[2, 1] + w[1, 1]*x^{(l)}[2, 2] + w[1, 2]*x^{(l)}[2, 3] + w[2, 0]*x^{(l)}[3, 1] + w[2, 1]*x^{(l)}[3, 2] + w[2, 2]*x^{(l)}[3, 3]]])

In [6]:
# backward
(Matrix(x_output).reshape(1, 4) @ C.transpose()).reshape(4, 4)

Matrix([
[                          w[0, 0]*x^{(l+1)}[0, 0],                                                     w[0, 0]*x^{(l+1)}[0, 1] + w[0, 1]*x^{(l+1)}[0, 0],                                                     w[0, 1]*x^{(l+1)}[0, 1] + w[0, 2]*x^{(l+1)}[0, 0],                           w[0, 2]*x^{(l+1)}[0, 1]],
[w[0, 0]*x^{(l+1)}[1, 0] + w[1, 0]*x^{(l+1)}[0, 0], w[0, 0]*x^{(l+1)}[1, 1] + w[0, 1]*x^{(l+1)}[1, 0] + w[1, 0]*x^{(l+1)}[0, 1] + w[1, 1]*x^{(l+1)}[0, 0], w[0, 1]*x^{(l+1)}[1, 1] + w[0, 2]*x^{(l+1)}[1, 0] + w[1, 1]*x^{(l+1)}[0, 1] + w[1, 2]*x^{(l+1)}[0, 0], w[0, 2]*x^{(l+1)}[1, 1] + w[1, 2]*x^{(l+1)}[0, 1]],
[w[1, 0]*x^{(l+1)}[1, 0] + w[2, 0]*x^{(l+1)}[0, 0], w[1, 0]*x^{(l+1)}[1, 1] + w[1, 1]*x^{(l+1)}[1, 0] + w[2, 0]*x^{(l+1)}[0, 1] + w[2, 1]*x^{(l+1)}[0, 0], w[1, 1]*x^{(l+1)}[1, 1] + w[1, 2]*x^{(l+1)}[1, 0] + w[2, 1]*x^{(l+1)}[0, 1] + w[2, 2]*x^{(l+1)}[0, 0], w[1, 2]*x^{(l+1)}[1, 1] + w[2, 2]*x^{(l+1)}[0, 1]],
[                          w[2, 0]*x^{(l+1)}[1, 0],          