<a href="https://colab.research.google.com/github/kangwonlee/pytorch-ibm-coursera/blob/main/week05.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Hello PyTorch 👋🏻



references
* https://www.coursera.org/learn/deep-neural-networks-with-pytorch/
* https://github.com/damounayman/Deep-Neural-Networks-with-PyTorch/blob/main/Week1/1D_tensors.ipynb



## week 5



### 8.1 Deep Neural Networks



#### MNIST dataset



In [None]:
import functools
import os
from typing import Dict, List

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn
import torch.optim
import torch.utils.data
import torchvision



Model


In [None]:
class Net(torch.nn.Module):
  def __init__(self, layers:List[int], act=torch.nn.ReLU(), n_input:int=None):
    super(Net, self).__init__()

    if n_input is None:
      self.n_input = layers[0]
    else:
      self.n_input = n_input

    assert len(layers) > 2, f'len(argv) = {len(layers)}'

    self.hidden = torch.nn.ModuleList([])

    for n_in, n_out in zip(layers[:-1], layers[1:]):
      self.hidden.append(torch.nn.Linear(n_in, n_out))

    self.act = act

  def forward(self, x):
    for layer in self.hidden[:-1]:
      x = self.act(layer(x))

    return self.hidden[-1](x)

  def plot_activation(self, Y, X):
      a1 = torch.sigmoid(self.linears[0](X))
      plt.scatter(
          a1.detach().numpy()[:, 0],
          a1.detach().numpy()[:, 1],
          c=Y.numpy().reshape(-1)
      )
      plt.title('activations')
      plt.grid(True)


# https://pytorch.org/docs/stable/generated/torch.nn.ModuleList.html
# https://stackoverflow.com/questions/50463975/pytorch-how-to-properly-create-a-list-of-nn-linear
# https://discuss.pytorch.org/t/when-should-i-use-nn-modulelist-and-when-should-i-use-nn-sequential/5463



Data


In [None]:
@functools.lru_cache
def load_MNIST_torchvision(root:str='./data'):
  '''
  [[image, class], ...]
  '''

  train_dataset = torchvision.datasets.MNIST(
      root=root,
      train=True,
      download=True,
      transform=torchvision.transforms.ToTensor(),
  )

  test_dataset = torchvision.datasets.MNIST(
      root=root,
      train=False,
      download=True,
      transform=torchvision.transforms.ToTensor(),
  )

  return {
      'train': train_dataset,
      'validation': test_dataset,
  }



In [None]:
d_mnist = load_MNIST_torchvision()



Train


In [None]:
input_dim = 28 * 28
n_hidden = 100
n_hidden2 = 50
n_class = 10

criterion = torch.nn.CrossEntropyLoss()



In [None]:
train_loader = torch.utils.data.DataLoader(
    dataset=d_mnist['train'], batch_size=2000,
)

validation_loader = torch.utils.data.DataLoader(
    dataset=d_mnist['validation'], batch_size=5000,
)



In [None]:
def train_validate(
    model, optimizer, criterion,
    train_loader,
    validation_loader=[],
    n_epoch:int=1000,
  ) -> Dict[str, List[float]]:
  record = {
    'training_loss': [],
    'validation_accuracy': [],
  }

  if os.getenv('CI', False):
    n_epoch = 1

  for epoch in range(n_epoch):
    total = 0.0

    for x, y in train_loader:
      optimizer.zero_grad()

      yhat = model(x.view(-1, model.n_input))

      loss = criterion(yhat, y)
      optimizer.zero_grad()
      loss.backward()

      optimizer.step()

      total += loss.item()

      record['training_loss'].append(loss.item())

    # end train_loader loop

    correct = 0
    population = 0
    for x, y in validation_loader:
      z = model(x.view(-1, 28*28))
      _, label = torch.max(z, 1)
      correct += (label==y).sum().item()
      population += len(y)
    # end validation_loader loop

    if population:
      accuracy = 100.0 * (correct / population)
      record['validation_accuracy'].append(accuracy)

  # end epoch loop
  return record



ReLu with One hidden layer



In [None]:
model_iho = Net(
    [input_dim, n_hidden, n_class],
    act=torch.nn.ReLU(),
)

optimizer = torch.optim.SGD(model_iho.parameters(), lr=0.01)



In [None]:
%%time
training_results_iho = train_validate(
    model_iho, optimizer, criterion,
    train_loader, validation_loader,
    n_epoch=1 # set 30 to train
)



In [None]:
training_results_iho['label'] = 'i-h-o'



ReLu with Two hidden layers



In [None]:
model_ihho = Net(
    [input_dim, n_hidden2, n_hidden2, n_class],
    act=torch.nn.ReLU(),
)

optimizer = torch.optim.SGD(model_ihho.parameters(), lr=0.01)



In [None]:
%%time
training_results_ihho = train_validate(
    model_ihho, optimizer, criterion,
    train_loader, validation_loader,
    n_epoch=1 # set 30 to train
)



In [None]:
training_results_ihho['label'] = 'i-h-h-o'



more reusable plotter function



In [None]:
def plot_result(d_list:Dict[str,List[float]]):
  _, axs = plt.subplots(2, 1)

  for d in d_list:
    axs[0].plot(d['training_loss'], label=d['label'])
    axs[1].plot(d['validation_accuracy'], label=d['label'])

  axs[0].set_xlabel('iter')
  axs[0].legend(loc=0)
  axs[0].grid(True)

  axs[1].set_xlabel('epoch')
  axs[1].legend(loc=0)
  axs[1].grid(True)



In [None]:
plot_result([training_results_iho, training_results_ihho])



Tanh



In [None]:
model_tanh = Net(
    [input_dim, n_hidden2, n_hidden2, n_class],
    act=torch.nn.Tanh(),
)

optimizer = torch.optim.SGD(model_tanh.parameters(), lr=0.01)



In [None]:
%%time
training_results_tanh = train_validate(
    model_tanh, optimizer, criterion,
    train_loader, validation_loader,
    n_epoch=1 # set 30 to train
)
training_results_tanh['label'] = 'tanh'



In [None]:
plot_result([training_results_iho, training_results_ihho, training_results_tanh])



Sigmoid



In [None]:
model_sigmoid = Net(
    [input_dim, n_hidden2, n_hidden2, n_class],
    act=torch.nn.Sigmoid(),
)

optimizer = torch.optim.SGD(model_sigmoid.parameters(), lr=0.01)



In [None]:
%%time
training_results_sigmoid = train_validate(
    model_sigmoid, optimizer, criterion,
    train_loader, validation_loader,
    n_epoch=1 # set 30 to train
)
training_results_sigmoid['label'] = 'sigmoid'



In [None]:
plot_result([
    training_results_iho, training_results_ihho,
    training_results_tanh, training_results_sigmoid
])



#### Spiral dataset



In [None]:
# Create Data Class

class Data(torch.utils.data.Dataset):

    #  modified from: http://cs231n.github.io/neural-networks-case-study/
    # Constructor
    def __init__(self, K=3, N=500):
        D = 2
        X = np.zeros((N * K, D)) # data matrix (each row = single example)
        y = np.zeros(N * K, dtype='uint8') # class labels
        for j in range(K):
          ix = range(N * j, N * (j + 1))
          r = np.linspace(0.0, 1, N) # radius
          t = np.linspace(j * 4, (j + 1) * 4, N) + np.random.randn(N) * 0.2 # theta
          X[ix] = np.c_[r * np.sin(t), r*np.cos(t)]
          y[ix] = j
        self.y = torch.from_numpy(y).type(torch.LongTensor)
        self.x = torch.from_numpy(X).type(torch.FloatTensor)
        self.len = y.shape[0]

    # Getter
    def __getitem__(self, index):
        return self.x[index], self.y[index]

    # Get Length
    def __len__(self):
        return self.len

    # Plot the diagram
    def plot_data(self):
        plt.plot(self.x[self.y[:] == 0, 0].numpy(), self.x[self.y[:] == 0, 1].numpy(), 'o', label="y = 0")
        plt.plot(self.x[self.y[:] == 1, 0].numpy(), self.x[self.y[:] == 1, 1].numpy(), 'ro', label="y = 1")
        plt.plot(self.x[self.y[:] == 2, 0].numpy(), self.x[self.y[:] == 2, 1].numpy(), 'go', label="y = 2")
        plt.legend()



In [None]:
data_set = Data()
data_set.plot_data()



In [None]:
train_loader = torch.utils.data.DataLoader(
    dataset=data_set, batch_size=20,
)



In [None]:
model_spiral = Net(
    [2, 10, 10, 3],
    act=torch.nn.Tanh(),
)

optimizer = torch.optim.SGD(model_spiral.parameters(), lr=0.01)



In [None]:
def pcolor_class(model, data_set):
  data_set.plot_data()
  xmin = data_set.x[:, 0].min()
  xmax = data_set.x[:, 0].max()

  ymin = data_set.x[:, 1].min()
  ymax = data_set.x[:, 1].max()

  x = torch.linspace(xmin, xmax, 101)
  y = torch.linspace(ymin, ymax, 101)

  X, Y = torch.meshgrid(x, y)

  x_flat = torch.hstack([X.reshape(101*101, 1), Y.reshape(101*101, 1)])

  _, zhat = torch.max(model(x_flat), 1)

  Zhat = zhat.numpy().reshape(X.shape)

  ax = plt.gca()
  ax.pcolor(X, Y, Zhat)



In [None]:
%%time
result = train_validate(
    model=model_spiral, criterion=criterion, optimizer=optimizer,
    train_loader=train_loader, n_epoch=1, # 200 to train
)



In [None]:
pcolor_class(model_spiral, data_set)



### 8.2 Dropout



* To prevent overfitting
* Training & Evaluation steps
* Randomly drop off part of the neurons
* $p = 1$ shutdown all neurons
* $p$ too small : possible overfitting
* $p$ too high : possible underfitting



In [None]:
class DropoutNet(Net):
  def __init__(self, layers:List[int], act=torch.nn.ReLU(), p:float=0, n_input:int=None):
    super(DropoutNet, self).__init__(layers, act, n_input)
    self.drop = torch.nn.Dropout(p=p)

  def forward(self, x):
    for layer in self.hidden[:-1]:
      x = self.act(layer(x))
      x = self.drop(x)
    return self.hidden[-1](x)



In [None]:
# Create Data Class

class Data(torch.utils.data.Dataset):
  def __init__(self, N_SAMPLES=1000, noise_std=0.15, train=True):
    a = np.array([[-1, 1, 2, 1, 1, -3, 1]]).T

    self.x = np.matrix(np.random.rand(N_SAMPLES, 2))
    self.f = np.array(
      a[0]
      + (self.x) * a[1:3]
      + np.multiply(self.x[:, 0], self.x[:, 1]) * a[4]
      + np.multiply(self.x, self.x) * a[5:7]
    ).flatten()
    self.a = a

    self.y = np.zeros(N_SAMPLES)
    self.y[self.f > 0] = 1
    self.y = torch.from_numpy(self.y).type(torch.LongTensor)
    self.x = torch.from_numpy(self.x).type(torch.FloatTensor)
    self.x += noise_std * torch.randn(self.x.size())
    self.f = torch.from_numpy(self.f)
    # self.a = a

    if train:
      torch.manual_seed(1)
      self.x += noise_std * torch.randn(self.x.size())
      torch.manual_seed(0)

    self.len = len(self.x)

  def __getitem__(self, index):
    return self.x[index], self.y[index]

  def __len__(self):
    return self.len

  # Plot the diagram
  def plot_data(self):
    for y in range(self.y.min(), self.y.max() + 1):
      plt.plot(self.x[self.y[:] == y, 0].numpy(), self.x[self.y[:] == y, 1].numpy(), 'o', label=f'y = {y}')
    plt.legend(loc=0)
    plt.grid(True)




In [None]:
model = DropoutNet([2, 300, 2], p=0.0)
model_drop = DropoutNet([2, 300, 2], p=0.5)
model_drop.train()
# model_drop.eval()

optimizer_ofit = torch.optim.Adam(model.parameters(), lr=0.01)
optimizer_drop = torch.optim.Adam(model_drop.parameters(), lr=0.01)

criterion = torch.nn.CrossEntropyLoss()



In [None]:
data_set = Data()

torch.manual_seed(0)
validation_set = Data(train=False)

def init_loss():
  return {
    'training data no dropout': [],
    'validation data no dropout': [],
    'training data dropout': [],
    'validation data dropout': [],
}

LOSS = init_loss()



In [None]:
def train_dropout(
    model, model_dropout,
    data_set, validation_set,
    criterion,
    optimizer_ofit, optimizer_drop,
    LOSS, n_epoch=500,
  ):
  for epoch in range(n_epoch):
    yhat = model(data_set.x)
    yhat_drop = model_dropout(data_set.x)
    loss = criterion(yhat, data_set.y)
    loss_drop = criterion(yhat_drop, data_set.y)

    LOSS['training data no dropout'].append(loss.item())

    v = criterion(model(validation_set.x), validation_set.y)
    LOSS['validation data no dropout'].append(v.item())

    LOSS['training data dropout'].append(loss_drop.item())
    model_drop.eval()
    v = criterion(model_dropout(validation_set.x), validation_set.y)
    LOSS['validation data dropout'].append(v.item())
    model_drop.train()

    optimizer_ofit.zero_grad()
    optimizer_drop.zero_grad()
    loss.backward()
    loss_drop.backward()
    optimizer_ofit.step()
    optimizer_drop.step()



In [None]:
%%time
LOSS = init_loss()
train_dropout(
    model, model_drop,
    data_set, validation_set,
    criterion,
    optimizer_ofit, optimizer_drop,
    LOSS
)



In [None]:
pcolor_class(model, data_set)



In [None]:
model_drop.eval()
pcolor_class(model_drop, data_set)
model_drop.train()



In [None]:
def plot_loss_dict(loss):
  for key, value in loss.items():
    plt.semilogy(value, label=key)
  plt.xlabel('iter')
  plt.legend(loc=0)
  plt.grid(True)



In [None]:
plot_loss_dict(LOSS)



### 8.3 Neural Network initialization weights



### 8.4 Gradient Descent with Momentum



### 8.5 Batch Normalization

