In [None]:
import torch
tv = torch.__version__
print('Using PyTorch version: ', tv)
# check we have PyTorch 0.2.x
assert tv[0] == '0' and tv[2] == '2', tv

import numpy as np

# First things first: The world becomes tensorized

In [None]:
# Every deep learning framework is built upon Tensors
# These are marvelous multi-dimensional structures
# We can create Tensors out of Python lists or NumPy arrays
my_list = [0, 1, 2, 3]
my_array = np.array(my_list)
my_list_T = torch.LongTensor(my_list)
my_array_T = torch.LongTensor(my_array)
# These are the same, so the assertion will confirm it
assert type(my_list_T) == type(my_array_T)

# Now we'll create a multi-dimensional array out of a list of lists of lists (3-D)
T_3 = [[[0, 1, 2.], [5, 6, 7]], [[0.2, 0.4, 2.2], [4.5, -6, -9]]]
T_3 = np.array(T_3)

assert T_3.ndim == 3, T_3.ndim
print('Number of dimensions: ', T_3.ndim)
print('Shape of each dimension: ', T_3.shape)
# the dimensions of this NumPy array are [2, 2, 3]

### Congratz for your marvelous Tensors, but now what? 
Tensors have:
1. Info about the data type and the size of each dimension (but NumPy too!)
2. the GPU capabilities (NumPy DOES NOT)

In [None]:
# We can operate with Tensors of course
# weights matrix with [inputs x outputs] = [25 x 100]
W = torch.randn(100, 25)
# bias vector [100]
b = torch.zeros(100)
# input vector [25]
x = torch.randn(25)
# Yes, this is a single layer fully connected neural network
y = torch.matmul(W, x) + b
# y ~ [100] output vector
print('x size: ', x.size())
print('W size: ', W.size())
print('b size: ', b.size())
print('y = Wx + b, size: ', y.size())

### Some PyTorch notation for Tensors properties:

In [None]:
# NumPy --> PyTorch translation
# --------------------------------
# 1) shape --> size()
y.size()
print('y size: ', y.size())

# 2) reshape() --> view()
z = y.view(10, 10)
print('z size (y reshaped to 10x10): ', z.size())

# 3) expand_dims() --> unsqueeze()
Y = y.unsqueeze(-1)
print('Y size (y unsqueezed in last dim): ', Y.size())

# 4) transpose(0, 1) --> t()
Y_t = Y.t()
print('Y transposed size: ', Y_t.size())

### The "magic" behind AUTOGRAD

**Variable:** It wraps a Tensor, and supports nearly all of operations defined on it. Once you finish your computation you can call `.backward()` and have all the gradients computed automatically.

You can access the raw tensor through the `.data` attribute, while the gradient w.r.t. this variable is accumulated into `.grad`[[1]](http://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html).

In [None]:
from torch.autograd import Variable

T = torch.randn(10, 10)
# we make the Variable by just wrapping the Tensor with it
V = Variable(T)
# This is a Variable containing a FloatTensor
print(V)

### The reason to create Variables: the Graph

Tensors are nodes in the graph. Edges are the computations relating Tensors (as in TensorFlow). However, the main difference between PyTorch and TensorFlow is: **DYNAMIC GRAPH!**

<img src="dynamic_graph.gif" width="600px">

[comment]: (Reference_for_the_figure:https://medium.com/intuitionmachine/pytorch-dynamic-computational-graphs-and-modular-deep-learning-7e7f89f18d1)

The Graph is built operation by operation, thus on runtime!

In [None]:
# Example of a graph creation z = sum(x * y)
# requires_grad tells the framework we want the gradient wrt to that variable to be computed
x = Variable(torch.ones(10), requires_grad=True)
y = Variable(torch.ones(10), requires_grad=True)
z = x + y
out = z.sum()

In [None]:
out.backward()
print(z)
print(z.grad)
print(x.grad)
print(y.grad)

For further reference: http://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html