## Learning Pytorch
References:
* https://towardsdatascience.com/pytorch-tutorial-distilled-95ce8781a89c
* http://adventuresinmachinelearning.com/pytorch-tutorial-deep-learning/
* https://hsaghir.github.io/data_science/pytorch_starter/
* https://jdhao.github.io/2017/11/12/pytorch-computation-graph/
* https://discuss.pytorch.org/t/print-autograd-graph/692/4
* https://medium.com/intuitionmachine/pytorch-dynamic-computational-graphs-and-modular-deep-learning-7e7f89f18d1
* https://medium.com/init27-labs/pytorch-basics-in-4-minutes-c7814fa5f03d
* https://github.com/PaddlePaddle/VisualDL
* https://www.youtube.com/watch?v=rrekAv9Fml4
* https://www.youtube.com/watch?v=LAMwEJZqesU
* https://github.com/lanpa/tensorboard-pytorch
* https://github.com/lanpa/tensorboard-pytorch-examples

In [1]:
# Pytorch libraries
import torch
# Neural network support
import torch.nn as nn
# Optimizer (SGD, Adam, Etc...)
import torch.optim as optim

# Library that gives support for tensorboard and pytorch
from tensorboardX import SummaryWriter

# Set enviroment variable for make only the first GPU visible
import os
os.environ["CUDA_VISIBLE_DEVICES"] = str(0)

### Create Tensors
You can consider pytorch as a linear algebra library (like numpy) with support for calculations on the GPU and Neural Networks

In [2]:
# Create a matrix 2x2
x = torch.tensor([1, 2])
print('1x2 tensor:')
print(x)
print(x.size())

# Create a matrix 2x2
x = torch.tensor([[1, 2],[3, 4]])
print('\n2x2 tensor:')
print(x)
print(x.size())
print('All rows from first collumn')
# We can have all numpy cool matrix sub-indexing
print(x[:, 0])
print(x[:, 0].size())

## Create unitialized 5x3 tensor
x = torch.empty(5, 3, dtype=torch.long)
print('\nNon-initialized 5x3 tensor:')
print(x)

# Create random 5x3 tensor
x = torch.rand(5, 3)
print('\nRandom 5x3 tensor:')
print(x)

# Create 5x3 zeros tensor of type 
print('\nZeros 5x3 tensor:')
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

# Create 5x3 zeros tensor of type 
print('\nZeros 5x3 tensor:')
x = torch.ones(5, 3, dtype=torch.double)
print(x)

# Create 5x3 zeros tensor of type 
print('\nZeros 5x3 tensor:')
y= torch.ones(5, 3, dtype=torch.double) * 2
print(y)

1x2 tensor:
tensor([ 1,  2])
torch.Size([2])

2x2 tensor:
tensor([[ 1,  2],
        [ 3,  4]])
torch.Size([2, 2])
All rows from first collumn
tensor([ 1,  3])
torch.Size([2])

Non-initialized 5x3 tensor:
tensor([[ 1.3988e+14,  1.3988e+14,  1.0000e+00],
        [ 5.6592e+07,  1.0000e+00,  4.2950e+09],
        [ 6.4000e+01,  6.4000e+01,  5.6598e+07],
        [ 2.0000e+00,  3.0065e+10,  1.3988e+14],
        [ 0.0000e+00,  1.3964e+14,  1.2800e+02]])

Random 5x3 tensor:
tensor([[ 0.6224,  0.9536,  0.8155],
        [ 0.1915,  0.9667,  0.6261],
        [ 0.8117,  0.1787,  0.2665],
        [ 0.1328,  0.7333,  0.2689],
        [ 0.5241,  0.0260,  0.2483]])

Zeros 5x3 tensor:
tensor([[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]])

Zeros 5x3 tensor:
tensor([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.]], dtype=torch.float64)

Zeros 5x3 tensor:
tensor([[ 2.,  2.,  2.],
    

### Operations
Pytorch implement most of the numpy operations

In [3]:
print(x+y)
print(x*y)
print((x+6)/y)

# Reshape
print(((x+6)/y).view(15))
print(((x+6)/y).view(15).size())
print(((x+6)/y).view(3,5))
print(((x+6)/y).view(3,5).size())

tensor([[ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.]], dtype=torch.float64)
tensor([[ 2.,  2.,  2.],
        [ 2.,  2.,  2.],
        [ 2.,  2.,  2.],
        [ 2.,  2.,  2.],
        [ 2.,  2.,  2.]], dtype=torch.float64)
tensor([[ 3.5000,  3.5000,  3.5000],
        [ 3.5000,  3.5000,  3.5000],
        [ 3.5000,  3.5000,  3.5000],
        [ 3.5000,  3.5000,  3.5000],
        [ 3.5000,  3.5000,  3.5000]], dtype=torch.float64)
tensor([ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000,  3.5000,  3.5000,
         3.5000,  3.5000,  3.5000,  3.5000,  3.5000,  3.5000,  3.5000,
         3.5000], dtype=torch.float64)
torch.Size([15])
tensor([[ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000],
        [ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000],
        [ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000]], dtype=torch.float64)
torch.Size([3, 5])


### Numpy Bridge
Allows converting tensors from pytorch to numpy and from numpy to pytorch

In [4]:
# Convert torch tensor to numpy
y_numpy = y.numpy()
print(y_numpy)
print(y_numpy.shape)
print(type(y_numpy))

# Convert numpy ndarray to torch
y_torch = torch.from_numpy(y_numpy)
print(y_torch)
print(y_torch.size())
print(type(y_torch))

[[2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]]
(5, 3)
<class 'numpy.ndarray'>
tensor([[ 2.,  2.,  2.],
        [ 2.,  2.,  2.],
        [ 2.,  2.,  2.],
        [ 2.,  2.,  2.],
        [ 2.,  2.,  2.]], dtype=torch.float64)
torch.Size([5, 3])
<class 'torch.Tensor'>


### Working with GPU
Send data and do calculations on the GPU

In [5]:
print(torch.cuda.is_available())
if torch.cuda.is_available():
    # Point to your GPU
    device = torch.device("cuda:0") 
    # Move tensor on GPU
    x_gpu = x.to(device)
    y_gpu = y.to(device)
    # Create tensor on GPU
    b_gpu = torch.ones_like(x, device=device) + 3.5
    # Result and the calculation will be on the GPU
    result = x_gpu + y_gpu - b_gpu
    print(result)
    # Move to CPU
    result_cpu = result.to("cpu", torch.double)
    print(result_cpu)

True
tensor([[-1.5000, -1.5000, -1.5000],
        [-1.5000, -1.5000, -1.5000],
        [-1.5000, -1.5000, -1.5000],
        [-1.5000, -1.5000, -1.5000],
        [-1.5000, -1.5000, -1.5000]], dtype=torch.float64, device='cuda:0')
tensor([[-1.5000, -1.5000, -1.5000],
        [-1.5000, -1.5000, -1.5000],
        [-1.5000, -1.5000, -1.5000],
        [-1.5000, -1.5000, -1.5000],
        [-1.5000, -1.5000, -1.5000]], dtype=torch.float64)


### Autograd
Automatically calculate the effect of a function parameter into the output (Chain-Rule)

In [6]:
a = torch.ones(1, requires_grad=True)
b = torch.ones(1, requires_grad=True)
c = torch.ones(1, requires_grad=True)
y = ((2*a) * (3*b)) + (4*c)
print(y)
y.backward()
print(y.grad_fn)

tensor([ 10.])
<AddBackward1 object at 0x7f3744f89be0>


In [7]:
print(a.grad)
print(b.grad)
print(c.grad)

tensor([ 6.])
tensor([ 6.])
tensor([ 4.])


### Simple MLP for XOR problem

In [8]:
trainingdataX = [[[0.01, 0.01], [0.01, 0.90], [0.90, 0.01], [0.95, 0.95]], 
                 [[0.02, 0.03], [0.04, 0.95], [0.97, 0.02], [0.96, 0.95]]]
trainingdataY = [[[0.01], [0.90], [0.90], [0.01]], [[0.04], [0.97], [0.98], [0.1]]]
num_epoch = 100

In [9]:
class Net(nn.Module):
    # Add layers
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(2, 50)  # 2 Input noses, 50 in middle layers
        self.fc2 = nn.Linear(50, 1)  # 50 middle layer, 1 output nodes
        self.rl1 = nn.ReLU()
        self.rl2 = nn.ReLU()

    # Define Forward propagation graph
    def forward(self, x):
        x = self.fc1(x)
        x = self.rl1(x)
        x = self.fc2(x)
        x = self.rl2(x)
        return x

In [10]:
# Tensorboard writer at logs directory
writer = SummaryWriter('logs')

# Create network
net = Net()
print(net)

# Send graph to tensorboard
writer.add_graph(net, torch.rand(4,2))

# Loss (Mean squared error)
criterion = nn.MSELoss()

# Stochastic Gradient Descent Optimizer 
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.1)

Net(
  (fc1): Linear(in_features=2, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=1, bias=True)
  (rl1): ReLU()
  (rl2): ReLU()
)


In [11]:
# For each epoch (epoch==>Run on the entire dataset)
for epoch in range(num_epoch):
    running_loss = 0.0
    # For each element on the dataset
    for i, data in enumerate(trainingdataX, 0):
        # Get data
        inputs = data
        labels = trainingdataY[i]
        
        # Convert input/labels to torch tensors
        inputs = torch.FloatTensor(inputs)
        labels = torch.FloatTensor(labels)
        
        # Manually set gradients to zero before the loss.backward() and optimizer.step()
        optimizer.zero_grad()
        
        # Forward Propagation
        outputs = net(inputs)
                
        # Compute loss
        loss = criterion(outputs, labels)
        
        # Compute loss backpropagation
        loss.backward()
        
        # Run optimizer
        optimizer.step()
        
        # Get loss scalar value
        running_loss += loss.item()                
        
        if i % 100 == 0:
            print("loss: ", i, running_loss)
            # Send loss to tensorboard
            writer.add_scalar('loss', running_loss, epoch)
            running_loss = 0.0

# Close summary writer
writer.close()

loss:  0 0.3227727711200714
loss:  0 0.2476765513420105
loss:  0 0.20148196816444397
loss:  0 0.18929597735404968
loss:  0 0.1779015213251114
loss:  0 0.16722047328948975
loss:  0 0.15708935260772705
loss:  0 0.14734986424446106
loss:  0 0.13827751576900482
loss:  0 0.1294705867767334
loss:  0 0.12094464898109436
loss:  0 0.11289694160223007
loss:  0 0.10484233498573303
loss:  0 0.09720581769943237
loss:  0 0.0893995612859726
loss:  0 0.08209510892629623
loss:  0 0.07511371374130249
loss:  0 0.0688881054520607
loss:  0 0.0630224198102951
loss:  0 0.057847000658512115
loss:  0 0.053466688841581345
loss:  0 0.049107626080513
loss:  0 0.04515097290277481
loss:  0 0.041556261479854584
loss:  0 0.03707286715507507
loss:  0 0.03449761122465134
loss:  0 0.03173501417040825
loss:  0 0.028862591832876205
loss:  0 0.026337821036577225
loss:  0 0.02375251241028309
loss:  0 0.02213069424033165
loss:  0 0.020114663988351822
loss:  0 0.0180179625749588
loss:  0 0.01695568859577179
loss:  0 0.0151734