Recommended materials
====

1. Pytorch Official Tutorial \[[Link](https://pytorch.org/tutorials/)\]
2. DeepLearning Zero to All \[[English](https://www.youtube.com/playlist?list=PLlMkM4tgfjnJ3I-dbhO9JTw7gNty6o_2m)\] \[[Korean](https://www.youtube.com/playlist?list=PLQ28Nx3M4JrhkqBVIXg-i5_CVVoS1UzAv)\]
3. Neural Network Programming - Deep Learning with Pytorch \[[English](https://www.youtube.com/playlist?list=PLZbbT5o_s2xrfNyHZsM6ufI0iZENK9xgG)\]

Building Neural Networks and Constructing Optimizers
=====



## Key Modules & Functions for building neural networks
- `torch.nn`
- `torch.nn.functional`
- MLP : `torch.nn.Linear`
- Convolutional Layer : `torch.nn.Conv2d`, `torch.nn.ConvTranspose2d`
- Dropout : `torch.nn.Dropout1d`, `torch.nn.Dropout2d`
- Batch normalization : `torch.nn.BatchNorm1d`, `torch.nn.BatchNorm2d`
- Sigmoid : `torch.nn.Sigmoid`, `torch.nn.functional.sigmoid`
- Tanh : `torch.nn.Tanh`, `torch.nn.functional.tanh`
- Softmax : `torch.nn.Softmax`, `torch.nn.functional.softmax`
- ReLU : `torch.nn.ReLU`, `torch.nn.functional.relu`
- LeakyReLU : `torch.nn.LeakyReLU`, `torch.nn.functional.leaky_relu`

## Example 1. A simple MLP  
![](https://drive.google.com/uc?export=view&id=1_CUhZor5dcxcL9K1g-vnExZ04vVt7SV5)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# set (hyper-)parameters
input_dim = 10
hidden1_dim = 20
hidden2_dim = 30
hidden3_dim = 40
hidden4_dim = 50
output_dim = 1
batch_size = 5

# build a neural network
mlp1 = nn.Linear(input_dim, hidden1_dim)
mlp2 = nn.Linear(hidden1_dim, hidden2_dim)
mlp3 = nn.Linear(hidden2_dim, hidden3_dim)
mlp4 = nn.Linear(hidden3_dim, hidden4_dim)
mlp5 = nn.Linear(hidden4_dim, output_dim)

# get an input data
inputs = torch.randn(batch_size, input_dim)

# feed the data into the network layers
hidden1 = mlp1(inputs)
hidden2 = mlp2(hidden1)
hidden3 = mlp3(hidden2)
hidden4 = mlp4(hidden3)
outputs = mlp5(hidden4)

# check output shapes
print(inputs.shape)
print(hidden1.shape)
print(hidden2.shape)
print(hidden3.shape)
print(hidden4.shape)
print(outputs.shape)

## Example 2. A MLP with ReLU, DropOut(p), and Batch Normalization  
![](https://drive.google.com/uc?export=view&id=1UDa3oBU_fPl6ZX8GY17l_sao63pHuNox)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# set (hyper-)parameters
input_dim = 10
hidden1_dim = 20
p = 0.2
output_dim = 1
batch_size = 5

# build a neural network
mlp1 = nn.Linear(input_dim, hidden1_dim)
relu = nn.ReLU()
dropout = nn.Dropout(p)
batchnorm = nn.BatchNorm1d(hidden1_dim)
mlp2 = nn.Linear(hidden1_dim, output_dim)

# get an input data
inputs = torch.randn(batch_size, input_dim)

# feed the data into the network layers
hidden1 = mlp1(inputs)
hidden2 = relu(hidden1)
hidden3 = dropout(hidden2)
hidden4 = batchnorm(hidden3)
outputs = mlp2(hidden4)

# check output shapes
print(inputs.shape)
print(hidden1.shape)
print(hidden2.shape)
print(hidden3.shape)
print(hidden4.shape)
print(outputs.shape)

## `torch.nn.Sequential` makes your life more easier

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# set (hyper-)parameters
input_dim = 10
hidden1_dim = 20
p = 0.2
output_dim = 1
batch_size = 5

# build a neural network
my_network = nn.Sequential(
  nn.Linear(input_dim, hidden1_dim),
  nn.ReLU(),
  nn.Dropout(p),
  nn.BatchNorm1d(hidden1_dim),
  nn.Linear(hidden1_dim, output_dim),
)

# get an (fake) input data
inputs = torch.randn(batch_size, input_dim)

# feed the data into the network layers
outputs = my_network(inputs)

# check output shapes
print(inputs.shape)
print(outputs.shape)

## The most standard way of building neural networks
- Modularize your network using `torch.nn.Module` **(highly-recommended)**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


# pre-define your modularized network class
class MyNetwork(nn.Module):
  def __init__(self, input_dim, hidden1_dim, p, output_dim):
    super(MyNetwork, self).__init__()
    
    self.layers = nn.Sequential(
      nn.Linear(input_dim, hidden1_dim),
      nn.ReLU(),
      nn.Dropout(p),
      nn.BatchNorm1d(hidden1_dim),
      nn.Linear(hidden1_dim, output_dim),
    )
    
  def forward(self, x):
    prediction = self.layers(x)
    return prediction
  
  
# set (hyper-)parameters
input_dim = 10
hidden1_dim = 20
p = 0.2
output_dim = 1
batch_size = 5

# build a neural network
my_network = MyNetwork(input_dim, hidden1_dim, p, output_dim)

# get an (fake) input data
inputs = torch.randn(batch_size, input_dim)

# feed the data into the network layers
outputs = my_network(inputs)

# check output shapes
print(inputs.shape)
print(outputs.shape)

## Modules for constructing optimizers
- `torch.optim`
- SGD : `torch.optim.SGD`
- ADAM : `torch.optim.Adam`
- RMSProp : `torch.optim.RMSprop`

Please refer to [API](https://pytorch.org/docs/stable/optim.html) for more information on supported optimizers

## Train a toy neural network regressor that learns a function y = 2x

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# pre-define your modularized network class
class MyNetwork(nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim):
    super(MyNetwork, self).__init__()
    
    self.layers = nn.Sequential(
      nn.Linear(input_dim, hidden_dim),
      nn.ReLU(),
      nn.Linear(hidden_dim, hidden_dim),
      nn.ReLU(),
      nn.Linear(hidden_dim, hidden_dim),
      nn.ReLU(),
      nn.Linear(hidden_dim, output_dim),
    )
    
  def forward(self, x):
    prediction = self.layers(x)
    return prediction
  
  
# set (hyper-)parameters
input_dim = 1
hidden_dim = 100
output_dim = 1
batch_size = 100
learning_rate = 0.0001
max_iteration = 10000

# build a neural network
my_network = MyNetwork(input_dim, hidden_dim, output_dim)

# construct an optimizer
optimizer = optim.SGD(my_network.parameters(), lr=learning_rate)

# start training
for i in range(max_iteration):
  # get an input data & label pair
  inputs = torch.randn(batch_size, input_dim)
  labels = inputs*2

  # feed the data into the network layers
  predictions = my_network(inputs)

  # calculate loss
  loss = F.mse_loss(predictions, labels)
  
  # flush previous gradients
  optimizer.zero_grad()

  # backpropage loss and get gradients
  loss.backward()
  
  # update network parameters using gradients
  optimizer.step()
  
  if i % 1000 == 0:
    print('[{}] train loss : {:.3f}'.format(i, loss))


# test
test_inputs = torch.tensor([[0], [0.25], [0.5], [0.75], [1]])
test_labels = test_inputs*2
predictions = my_network(test_inputs)

print('test inputs :\n', test_inputs)
print('test labels :\n', test_labels)
print('predictions :\n', predictions)