<a href="https://colab.research.google.com/github/kangwonlee/pytorch-ibm-coursera/blob/main/week04_10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Hello PyTorch 👋🏻



references
* https://www.coursera.org/learn/deep-neural-networks-with-pytorch/
* https://github.com/damounayman/Deep-Neural-Networks-with-PyTorch/blob/main/Week1/1D_tensors.ipynb



## week 4



## Shallow Neural Networks



### 7.1 Neural Networks in One Dimension



In [None]:
import functools
import os
import random
from typing import Callable


import matplotlib.pyplot as plt
import torch
import torch.nn
import torchvision.datasets



In [None]:
torch.manual_seed(0)


class Net(torch.nn.Module):
  def __init__(self, *argv, **kwarg):
    super(Net, self).__init__()

    self.linears = torch.nn.ModuleList([])
    for n_in, n_out in (argv[:-1], argv[1:]):
      self.linears.append(torch.nn.Linear(n_in, n_out))

  def forward(self, x):

    for layer in self.linears:
      x = torch.sigmoid(layer(x))

    return x

  def plot_activation(self, Y, X):
      a1 = torch.sigmoid(self.linears[0](X))
      plt.scatter(
          a1.detach().numpy()[:, 0],
          a1.detach().numpy()[:, 1],
          c=Y.numpy().reshape(-1)
      )
      plt.title('activations')
      plt.grid(True)


# https://pytorch.org/docs/stable/generated/torch.nn.ModuleList.html
# https://stackoverflow.com/questions/50463975/pytorch-how-to-properly-create-a-list-of-nn-linear
# https://discuss.pytorch.org/t/when-should-i-use-nn-modulelist-and-when-should-i-use-nn-sequential/5463



* 1 input node
* two nodes in the input layer
* one node on the output layer



$$
\begin{align}
z_{N \times 2}^1&=x_{N \times 1}^1W_{1 \times 2}^1+b_{N \times 2}^1 \\
x_{N \times 2}^2&=\sigma(z_{N \times 2}^1) \\
z_{N \times 1}^2&=x_{N \times 2}^2W_{2 \times 1}^2+b_{N \times 1}^2 \\
y_{N \times 1}&=\sigma(z_{N \times 1}^2)
\end{align}
$$



In [None]:
model = Net(1, 2, 1)

x = torch.tensor([0.0])

yhat = model(x)
yhat



In [None]:
model.state_dict()



Sequential version



In [None]:
torch.manual_seed(1)

model = torch.nn.Sequential(
    torch.nn.Linear(1, 2),
    torch.nn.Sigmoid(),
    torch.nn.Linear(2, 1),
    torch.nn.Sigmoid(),
)

x = torch.tensor([0.0])

yhat = model(x)

yhat



In [None]:
x = torch.tensor([
    [0.0],
    [1.0],
    [2.0],
  ])

yhat = model(x)

yhat



In [None]:
model.state_dict()



Train the model



In [None]:
X = torch.linspace(-20, 20, 41).view(-1, 1).type(torch.FloatTensor)
Y = torch.zeros(X.shape[0])
Y[
    (X[:, 0] > -4.0) & (X[:, 0] < 4.0)
] = 1.0
Y = Y.view(-1, 1)



In [None]:
def plot_y_yhat(X, Y, model, epoch, legend=True):
    plt.plot(
        X.numpy(), model(X).detach().numpy(),
        label=('epoch '+str(epoch))
    )
    plt.plot(X.numpy(), Y.numpy(), 'r')
    plt.xlabel('x')
    if legend:
      plt.legend(loc=0)
    plt.grid(True)



In [None]:
def train(Y, X, model, optimizer, criterion, n_epoch=1000):
  cost = []
  total = 0

  if os.getenv('CI', False):
    n_epoch = 1

  for epoch in range(n_epoch):
    total = 0.0

    for y, x in zip(Y, X):
      yhat = model(x)

      # https://stackoverflow.com/questions/57798033/valueerror-target-size-torch-size16-must-be-the-same-as-input-size-torch
      loss = criterion(yhat, y)
      loss.backward()

      optimizer.step()
      optimizer.zero_grad()

      total += loss.item()

    cost.append(total)

  return cost



In [None]:
model = Net(1, 2, 1)

plot_y_yhat(X, Y, model, 0, legend=True)



In [None]:
model.plot_activation(Y, X)



In [None]:
loss_list = train(
    Y=Y, X=X, model=model,
    optimizer=torch.optim.SGD(model.parameters(), lr=0.1),
    criterion=torch.nn.BCELoss(),
    n_epoch=1 # use 1000 to train
)



In [None]:
if len(loss_list) > 10:
  plot_y_yhat(X, Y, model, len(loss_list), legend=True)



In [None]:
if len(loss_list) > 10:
  model.plot_activation(Y, X)



In [None]:
if len(loss_list) > 10:
  plt.plot(loss_list)
  plt.xlabel('epoch')
  plt.ylabel('loss')
  plt.grid(True)



### 7.2 Neural Networks More Hidden Neurons



More complicated data



In [None]:
X = torch.linspace(-20, 20, 41).view(-1, 1).type(torch.FloatTensor)
Y = torch.zeros(X.shape[0])
Y[
    ((X[:, 0] > -10.0) & (X[:, 0] < -5.0))
    | ((X[:, 0] > 5.0) & (X[:, 0] < 10.0))
] = 1.0
Y = Y.view(-1, 1)



How would the previous model do?



In [None]:
model = Net(1, 2, 1)

plot_y_yhat(X, Y, model, 0, legend=True)



In [None]:
model.plot_activation(Y, X)



In [None]:
loss_list = train(
    Y=Y, X=X, model=model,
    optimizer=torch.optim.SGD(model.parameters(), lr=0.1),
    criterion=torch.nn.BCELoss(),
    n_epoch=1, # use 1000 to train
)



In [None]:
if len(loss_list) > 10:
  plot_y_yhat(X, Y, model, len(loss_list), legend=True)



In [None]:
if len(loss_list) > 10:
  model.plot_activation(Y, X)



In [None]:
if len(loss_list) > 10:
  plt.plot(loss_list)
  plt.xlabel('epoch')
  plt.ylabel('loss')
  plt.grid(True)



Add more neurons to the hidden layer



In [None]:
torch.manual_seed(0)
model = Net(1, 9, 1)



Define the dataset


In [None]:
class Data(torch.utils.data.Dataset):
  def __init__(self):
    self.x = torch.linspace(-20, 20, 100).view(-1, 1).type(torch.FloatTensor)

    self.y = torch.zeros(self.x.shape[0])
    self.y[
        ((self.x[:, 0] > -10.0) & (self.x[:, 0] < -5.0))
        | ((self.x[:, 0] > 5.0) & (self.x[:, 0] < 10.0))
    ] = 1.0

    self.y = self.y.view(-1, 1)
    self.len = self.x.shape[0]

  def __getitem__(self, index):
    return self.x[index], self.y[index]

  def __len__(self):
    return self.len

  def plot_y_yhat(self, model, epoch, legend=True):
    plot_y_yhat(self.x, self.y, model, epoch, legend=True)



In [None]:
data_set = Data()
data_set.plot_y_yhat(model, 0, legend=True)



In [None]:
model.plot_activation(Y, X)



train() function with `trainloader`



In [None]:
def train(model, optimizer, criterion, train_loader, n_epoch=1000):
  cost = []

  if os.getenv('CI', False):
    n_epoch = 1

  for epoch in range(n_epoch):
    total = 0.0

    for x, y in train_loader:
      optimizer.zero_grad()
      yhat = model(x)

      loss = criterion(yhat, y)
      optimizer.zero_grad()
      loss.backward()

      optimizer.step()

      total += loss.item()

    cost.append(total)

  return cost



In [None]:
%%time
loss_list = train(
    model=model,
    optimizer=torch.optim.Adam(model.parameters(), lr=0.01),
    criterion=torch.nn.BCELoss(),
    train_loader=torch.utils.data.DataLoader(dataset=data_set, batch_size=100),
    n_epoch=1 # use 1000 to train
)



In [None]:
if len(loss_list) > 10:
  plot_y_yhat(X, Y, model, len(loss_list), legend=True)



In [None]:
if len(loss_list) > 10:
  plt.plot(loss_list)
  plt.xlabel('epoch')
  plt.ylabel('loss')
  plt.grid(True)



### 7.3 Neural Networks with Multiple Dimensional Input



In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mc


import torch
import torch.nn
import torch.utils.data



In [None]:
torch.manual_seed(1)
class XOR_Data(torch.utils.data.Dataset):

  def __init__(self, N_s=100, noise=0.1):

    self.x = torch.zeros((N_s, 2))
    self.y = torch.zeros((N_s, 1))

    Ns_4 = N_s // 4

    for i in range(0, Ns_4):
      self.x[i, :] = torch.Tensor([0.0, 0.0])
      self.y[i, 0] = torch.Tensor([0.0])

    for i in range(Ns_4, 2 * Ns_4):
      self.x[i, :] = torch.Tensor([0.0, 1.0])
      self.y[i, 0] = torch.Tensor([1.0])

    for i in range(2 * Ns_4, 3 * Ns_4):
      self.x[i, :] = torch.Tensor([1.0, 0.0])
      self.y[i, 0] = torch.Tensor([1.0])

    for i in range(3 * Ns_4, N_s):
      self.x[i, :] = torch.Tensor([1.0, 1.0])
      self.y[i, 0] = torch.Tensor([0.0])

    self.x += noise * torch.randn((N_s, 2))
    self.len = N_s

  def __getitem__(self, index):
    return self.x[index], self.y[index]

  def __len__(self):
    return self.len

  def plot(self, model=None):
    _, ax = plt.subplots(subplot_kw={"projection": "3d"})

    ax.plot(
      self.x[:, 0].numpy(),
      self.x[:, 1].numpy(),
      self.y[:, 0].numpy(),
      '.',
      label='data',
    )

    if model is not None:
      yhat = model(self.x)
      ax.plot(
        self.x[:, 0].numpy(),
        self.x[:, 1].numpy(),
        yhat[:, 0].detach().numpy(),
        '.',
        label='model',
      )

    return ax



In [None]:
data_set = XOR_Data()



Plot of data



In [None]:
ax = data_set.plot()



Model



In [None]:
model = Net(2, 4, 1)



In [None]:
# from 7.3 lab
import matplotlib.colors as mc
import numpy as np


def plot_decision_regions_2class(model,data_set):
    cmap_light = mc.ListedColormap(['#FFAAAA', '#AAFFAA', '#00AAFF'])
    cmap_bold = mc.ListedColormap(['#FF0000', '#00FF00', '#00AAFF'])
    X = data_set.x.numpy()
    y = data_set.y.numpy()
    h = .02
    x_min, x_max = X[:, 0].min() - 0.1 , X[:, 0].max() + 0.1
    y_min, y_max = X[:, 1].min() - 0.1 , X[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
    XX = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])

    yhat = np.logical_not((model(XX)[:, 0] > 0.5).numpy()).reshape(xx.shape)
    plt.pcolormesh(xx, yy, yhat, cmap=cmap_light)
    plt.plot(X[y[:, 0] == 0, 0], X[y[:, 0] == 0, 1], 'o', label='y=0')
    plt.plot(X[y[:, 0] == 1, 0], X[y[:, 0] == 1, 1], 'ro', label='y=1')
    plt.title("decision region")
    plt.legend()



Before training



In [None]:
ax = data_set.plot(model)



In [None]:
plot_decision_regions_2class(model,data_set)



In [None]:
%%time
loss_list = train(
    model=model,
    optimizer=torch.optim.SGD(model.parameters(), lr=0.1),
    criterion=torch.nn.BCELoss(),
    train_loader=torch.utils.data.DataLoader(dataset=data_set, batch_size=1),
    n_epoch=1 # use 500 to train
)



After training



In [None]:
if len(loss_list) > 10:
  plt.plot(loss_list)
  plt.xlabel('epoch')
  plt.ylabel('loss')
  plt.grid(True)



In [None]:
ax = data_set.plot(model)



In [None]:
plot_decision_regions_2class(model,data_set)



### 7.4 Multi-Class Neural Networks



Pass the result of the last layer without `sigmoid()` function



In [None]:
class MultilcassNet(Net):
  def __init__(self, *argv, **kwarg):
    super(MultilcassNet, self).__init__(*argv, **kwarg)

  def forward(self, x):
    for layer in self.linears[:-1]:
      x = torch.sigmoid(layer(x))

    return self.linears[-1](x)



In [None]:
model = Net(2, 3, 2)



Sequential version



In [None]:
def build_sequential(in_dim:int, h:int, out_dim:int):
  return torch.nn.Sequential(
    torch.nn.Linear(in_dim, h),
    torch.nn.Sigmoid(),
    torch.nn.Linear(h, out_dim),
  )



Data



In [None]:
@functools.lru_cache
def load_MNIST_torchvision(root:str='./data'):
  '''
  [[image, class], ...]
  '''

  train_dataset = torchvision.datasets.MNIST(
      root=root,
      train=True,
      download=True,
      transform=torchvision.transforms.ToTensor(),
  )

  test_dataset = torchvision.datasets.MNIST(
      root=root,
      train=False,
      download=True,
      transform=torchvision.transforms.ToTensor(),
  )

  return {
      'train': train_dataset,
      'validation': test_dataset,
  }



In [None]:
d_mnist = load_MNIST_torchvision()
x0, y0 = random.choice(d_mnist['train'])
x0 = np.squeeze(x0)
ax = plt.imshow(np.squeeze(x0))
plt.title(f'x0.shape = {x0.shape}, y = {y0}');



In [None]:
train_loader = torch.utils.data.DataLoader(
    dataset=d_mnist['train'], batch_size=2000,
)

validation_loader = torch.utils.data.DataLoader(
    dataset=d_mnist['validation'], batch_size=5000,
)



In [None]:
def train_validate(
    model, optimizer, criterion,
    train_loader, validation_loader,
    n_epoch:int=1000,
  ):
  record = {
    'training_loss': [],
    'validation_accuracy': [],
  }

  if os.getenv('CI', False):
    n_epoch = 1

  for epoch in range(n_epoch):
    total = 0.0

    for x, y in train_loader:
      optimizer.zero_grad()

      yhat = model(x.view(-1, 28*28))

      loss = criterion(yhat, y)
      optimizer.zero_grad()
      loss.backward()

      optimizer.step()

      total += loss.item()

      record['training_loss'].append(loss.item())

    # end train_loader loop

    correct = 0
    population = 0
    for x, y in validation_loader:
      z = model(x.view(-1, 28*28))
      _, label = torch.max(z, 1)
      correct += (label==y).sum().item()
      population += len(y)
    # end validation_loader loop
    accuracy = 100.0 * (correct / population)
    record['validation_accuracy'].append(accuracy)

  # end epoch loop
  return record



In [None]:
input_dim = 28 * 28
n_hidden = 100
n_class = 10
model = MultilcassNet(input_dim, n_hidden, n_class)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

training_results = train_validate(
    model, optimizer, criterion,
    train_loader, validation_loader,
    n_epoch=30
)



In [None]:
plt.subplot(2, 1, 1)
plt.plot(training_results['training_loss'])
plt.xlabel('iter')
plt.grid(True)

plt.subplot(2, 1, 2)
plt.plot(training_results['validation_accuracy'])
plt.xlabel('epoch')
plt.grid(True)



### 7.5 Backpropagation



### 7.6 Activation Function



In [None]:
class MultilcassNetAct(MultilcassNet):
  def __init__(self, *argv, **kwarg):
    super(MultilcassNetAct, self).__init__(*argv, **kwarg)
    self.act = kwarg.get('act', torch.nn.ReLU())

  def forward(self, x):
    for layer in self.linears[:-1]:
      x = self.act(layer(x))

    return self.linears[-1](x)



In [None]:
def build_sequential_act(in_dim:int, h:int, out_dim:int, act:Callable=torch.relu):
  return torch.nn.Sequential(
    torch.nn.Linear(in_dim, h),
    act(),
    torch.nn.Linear(h, out_dim),
  )



In [None]:
input_dim = 28 * 28
n_hidden = 100
n_class = 10

criterion = torch.nn.CrossEntropyLoss()



In [None]:
model_tanh = MultilcassNetAct(
    input_dim, n_hidden, n_class,
    act=torch.nn.Tanh(),
)

optimizer = torch.optim.SGD(model_tanh.parameters(), lr=0.01)

training_results_tanh = train_validate(
    model_tanh, optimizer, criterion,
    train_loader, validation_loader,
    n_epoch=30
)



In [None]:
plt.subplot(2, 1, 1)
plt.plot(training_results['training_loss'], label='sigmoid')
plt.plot(training_results_tanh['training_loss'], label='tanh')
plt.xlabel('iter')
plt.legend(loc=0)
plt.grid(True)


plt.subplot(2, 1, 2)
plt.plot(training_results['validation_accuracy'], label='sigmoid')
plt.plot(training_results_tanh['validation_accuracy'], label='tanh')
plt.xlabel('epoch')
plt.legend(loc=0)
plt.grid(True)



In [None]:
model_relu = MultilcassNetAct(
    input_dim, n_hidden, n_class,
    act=torch.nn.ReLU(),
)

optimizer = torch.optim.SGD(model_relu.parameters(), lr=0.01)

training_results_relu = train_validate(
    model_relu, optimizer, criterion,
    train_loader, validation_loader,
    n_epoch=30
)



In [None]:
plt.subplot(2, 1, 1)
plt.plot(training_results['training_loss'], label='sigmoid')
plt.plot(training_results_tanh['training_loss'], label='tanh')
plt.plot(training_results_relu['training_loss'], label='relu')
plt.xlabel('iter')
plt.legend(loc=0)
plt.grid(True)

plt.subplot(2, 1, 2)
plt.plot(training_results['validation_accuracy'], label='sigmoid')
plt.plot(training_results_tanh['validation_accuracy'], label='tanh')
plt.plot(training_results_relu['validation_accuracy'], label='relu')
plt.xlabel('epoch')
plt.legend(loc=0)
plt.grid(True)

