# Backpropagation in numpy vs TensorFlow vs PyTorch

## 1) numpy

In [1]:
import numpy as np

In [3]:
rowdim, coldim = 4,3

#### create data

In [5]:
Xdata = np.random.randn(rowdim, coldim)
print(Xdata)
Ydata = np.ones((rowdim, coldim))
print(Ydata)
Zdata = np.arange(1,13).reshape(4,3)
print(Zdata)

[[ 1.52886114 -0.22661619  0.68308932]
 [-1.73819668 -0.21263948  1.5615604 ]
 [-0.68697506  1.52023603  1.18332271]
 [-0.20727797 -0.39272456  1.73638653]]
[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]]
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


#### forward computations

In [6]:
# multiplication op
# this is element-wise
node1 = Xdata * Ydata
node1

array([[ 1.52886114, -0.22661619,  0.68308932],
       [-1.73819668, -0.21263948,  1.5615604 ],
       [-0.68697506,  1.52023603,  1.18332271],
       [-0.20727797, -0.39272456,  1.73638653]])

In [7]:
# addition op
node2 = node1 + Zdata
node2

array([[  2.52886114,   1.77338381,   3.68308932],
       [  2.26180332,   4.78736052,   7.5615604 ],
       [  6.31302494,   9.52023603,  10.18332271],
       [  9.79272203,  10.60727544,  13.73638653]])

In [8]:
# summarize whole matrix
node3 = np.sum(node2)
node3

82.749026166001784

#### backprop

In [9]:
grad_node3 = 1.0

In [10]:
grad_node2 = grad_node3 * np.ones((rowdim, coldim))
grad_node2

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [11]:
grad_node1 = grad_node2.copy()
grad_node1

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [12]:
grad_Z = grad_node2.copy()
grad_Z

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [13]:
grad_Y = grad_node1 * Xdata
grad_Y

array([[ 1.52886114, -0.22661619,  0.68308932],
       [-1.73819668, -0.21263948,  1.5615604 ],
       [-0.68697506,  1.52023603,  1.18332271],
       [-0.20727797, -0.39272456,  1.73638653]])

In [14]:
grad_X = grad_node1 * Ydata
grad_X

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

## 2) TensorFlow

In [15]:
import tensorflow as tf

#### graph

In [16]:
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
Z = tf.placeholder(tf.float32)

In [17]:
node1 = X * Y
node1

<tf.Tensor 'mul:0' shape=<unknown> dtype=float32>

In [18]:
node2 = node1 + Z
node2

<tf.Tensor 'add:0' shape=<unknown> dtype=float32>

In [19]:
node3 = tf.reduce_sum(node2)
node3

<tf.Tensor 'Sum:0' shape=<unknown> dtype=float32>

#### gradients

In [20]:
# tf.gradients(ys, xs, grad_ys=None, name='gradients', colocate_gradients_with_ops=False, gate_gradients=False, aggregation_method=None)
# Constructs symbolic partial derivatives of sum of `ys` w.r.t. x in `xs`.

grad_X, grad_Y, grad_Z = tf.gradients(node3, [X,Y,Z])

#### run

In [23]:
with tf.Session() as sess:
    node3_eval, grad_X_eval, grad_Y_eval, grad_Z_eval = sess.run(
        [node3, grad_X, grad_Y, grad_Z], feed_dict = {X : Xdata, Y: Ydata, Z: Zdata})    
    print(node3_eval)
    print(grad_X_eval)
    print(grad_Y_eval)
    print(grad_Z_eval)

82.749
[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]]
[[ 1.52886117 -0.22661619  0.68308932]
 [-1.73819673 -0.21263948  1.56156039]
 [-0.68697506  1.52023602  1.18332267]
 [-0.20727797 -0.39272454  1.73638654]]
[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]]


## 3) PyTorch

In [1]:
import torch
from torch.autograd import Variable

#### Variables hold data (Tensors) and gradients

In [8]:
X = Variable(torch.randn(rowdim, coldim), requires_grad = True)
Y = Variable(torch.ones(rowdim, coldim), requires_grad = True)
Z = Variable(torch.arange(1, 13).view(rowdim, coldim), requires_grad = True)

X, Y, Z

(Variable containing:
 -0.3541 -0.4335  0.2140
  1.3350 -0.9717 -0.7343
 -0.0088 -1.2783  0.1823
 -2.5544 -1.2672  1.3542
 [torch.FloatTensor of size 4x3], Variable containing:
  1  1  1
  1  1  1
  1  1  1
  1  1  1
 [torch.FloatTensor of size 4x3], Variable containing:
   1   2   3
   4   5   6
   7   8   9
  10  11  12
 [torch.FloatTensor of size 4x3])

#### forward computation

In [9]:
node1 = X * Y
node1

Variable containing:
-0.3541 -0.4335  0.2140
 1.3350 -0.9717 -0.7343
-0.0088 -1.2783  0.1823
-2.5544 -1.2672  1.3542
[torch.FloatTensor of size 4x3]

In [10]:
node2 = node1 + Z
node2

Variable containing:
  0.6459   1.5665   3.2140
  5.3350   4.0283   5.2657
  6.9912   6.7217   9.1823
  7.4456   9.7328  13.3542
[torch.FloatTensor of size 4x3]

In [11]:
node3 = torch.sum(node2)
node3

Variable containing:
 73.4833
[torch.FloatTensor of size 1]

#### autograd!

In [12]:
node3.backward()

In [14]:
print(X.grad.data)
print(Y.grad.data)
print(Z.grad.data)


 1  1  1
 1  1  1
 1  1  1
 1  1  1
[torch.FloatTensor of size 4x3]


-0.3541 -0.4335  0.2140
 1.3350 -0.9717 -0.7343
-0.0088 -1.2783  0.1823
-2.5544 -1.2672  1.3542
[torch.FloatTensor of size 4x3]


 1  1  1
 1  1  1
 1  1  1
 1  1  1
[torch.FloatTensor of size 4x3]

