<a href="https://colab.research.google.com/github/maicg01/python-image-processing/blob/main/MLP_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import unittest
import math

# The unit tests

## Tests for Fully connected layer

In [None]:
class TestFCMethods(unittest.TestCase):
  def test_fc_init(self):
    fc = FC(n_in=10, n_out=5, activation="sigmoid")
    self.assertEqual(fc.n_in, 10)
    self.assertEqual(fc.n_out, 5)
    self.assertEqual(fc.W.shape, (10,5))
    self.assertEqual(fc.dW.shape, (10,5))
    self.assertEqual(fc.activation, "sigmoid")

  def test_fc_forward(self):
    fc = FC(n_in=10, n_out=5, activation="sigmoid")
    x = np.zeros((3, 10), dtype=np.float32)
    y = fc.forward(x)
    error = np.sum(np.abs(y-np.ones((3,5))*0.5))
    self.assertEqual(y.shape, (3, 5))
    self.assertLess(error, 1e-6)

  def test_fc_forward_identity(self):
    fc = FC(n_in=10, n_out=5, activation=None)
    x = np.zeros((3, 10), dtype=np.float32)
    y = fc.forward(x)
    error = np.sum(np.abs(y-np.zeros((3,5))))
    self.assertEqual(y.shape, (3, 5))
    self.assertLess(error, 1e-6)

  def test_fc_backward_identity(self):
    fc = FC(n_in=10, n_out=5, activation=None)
    x = np.zeros((3, 10), dtype=np.float32)
    y = fc.forward(x)
    dx = fc.backward(np.ones_like(y))
    ## exercise: should add test on error of dx here
    self.assertEqual(dx.shape, x.shape)
    self.assertEqual(fc.dW.shape, fc.W.shape)

## Tests for MLP

In [None]:
class TestMLPMethods(unittest.TestCase):
  def test_mlp_init(self):
    model = MLP(n_in=10, hiddens=[5, 2])
    layer0 = model.layers[0]
    layer1 = model.layers[1]
    self.assertEqual(model.n_in, 10)
    self.assertEqual(model.hiddens, [5, 2])
    self.assertEqual(layer0.n_in, 10)
    self.assertEqual(layer0.n_out, 5)
    self.assertEqual(layer1.n_in, 5)
    self.assertEqual(layer1.n_out, 2)

  def test_mlp_forward(self):
    model = MLP(n_in=10, hiddens=[5, 2])
    x = np.zeros((3, 10), dtype=np.float32)
    y = model.forward(x)
    self.assertEqual(y.shape, (3, 2))

  def test_mlp_backward(self):
    model = MLP(n_in=10, hiddens=[5, 2])
    x = np.zeros((3, 10), dtype=np.float32)
    y = model.forward(x)
    dx = model.backward(np.ones_like(y))
    self.assertEqual(dx.shape, x.shape)

## Tests for ResBlock

In [None]:
class TestResBlockMethods(unittest.TestCase):
  def test_res_init(self):
    model = ResidualBlock(n_in=10, hiddens=[5, 6, 2])
    fc = model.input_fc
    layer0 = model.block.layers[0]
    layer1 = model.block.layers[1]
    skip = model.skip
    self.assertEqual((fc.n_in, fc.n_out), (10,5))
    self.assertEqual(model.block.n_in, 5)
    self.assertEqual(model.block.hiddens, [6, 2])
    self.assertEqual(layer0.n_in, 5)
    self.assertEqual(layer0.n_out, 6)
    self.assertEqual(layer1.n_in, 6)
    self.assertEqual(layer1.n_out, 2)
    self.assertEqual(skip.n_in, 5)
    self.assertEqual(skip.n_out, 2)

  def test_res_identity(self):
    model = ResidualBlock(n_in=10, hiddens=[5, 6, 5])
    fc = model.input_fc
    layer0 = model.block.layers[0]
    layer1 = model.block.layers[1]
    skip = model.skip
    self.assertEqual((fc.n_in, fc.n_out), (10,5))
    self.assertEqual(model.block.n_in, 5)
    self.assertEqual(model.block.hiddens, [6, 5])
    self.assertEqual(layer0.n_in, 5)
    self.assertEqual(layer0.n_out, 6)
    self.assertEqual(layer1.n_in, 6)
    self.assertEqual(layer1.n_out, 5)
    self.assertIsNone(skip, None)

  def test_res_forward(self):
    model = ResidualBlock(n_in=10, hiddens=[5, 2])
    x = np.zeros((3, 10), dtype=np.float32)
    y = model.forward(x)
    self.assertEqual(y.shape, (3, 2))

  def test_res_backward(self):
    model = ResidualBlock(n_in=10, hiddens=[5, 2])
    x = np.zeros((3, 10), dtype=np.float32)
    y = model.forward(x)
    dx = model.backward(np.ones_like(y))
    self.assertEqual(dx.shape, x.shape)

## Tests for Cross Entropy

In [None]:
class TestCEMethods(unittest.TestCase):
  def test_ce_forward(self):
    ypred = np.zeros((10, 5))
    ytrue = np.array([0,1,2,3,4,0,1,2,3,4], dtype=int)
    ce = CrossEntropyLoss()
    loss = ce.forward(ypred, ytrue)
    self.assertAlmostEqual(loss, -10*math.log(1/5))

  def test_ce_backward(self):
    ypred = np.zeros((10, 5))
    ytrue = np.array([0,1,2,3,4,0,1,2,3,4], dtype=int)
    ce = CrossEntropyLoss()
    loss = ce.forward(ypred, ytrue)
    d_ypred = ce.backward()
    desired = np.ones((10,5))*0.2
    desired[range(10), ytrue] -= 1
    error = np.sum(np.abs(d_ypred-desired))
    self.assertAlmostEqual(error, 0)

## Tests for Stochastic Gradient Descent

In [None]:
class TestSGDMethods(unittest.TestCase):
  def test_sgd_init(self):
    model = MLP(n_in=10, hiddens=[5, 2])
    sgd = SGDOptimizer(model, learning_rate=0.2, regularization=0.1)
    param = sgd.parameters()
    grad = sgd.grads()
    
    for p, g in zip(param, grad):
      self.assertEqual(p.shape, g.shape)
    self.assertEqual(sgd.learning_rate, 0.2)
    self.assertEqual(sgd.regularization, 0.1)


  def test_sgd_zero_grad(self):
    model = MLP(n_in=10, hiddens=[5, 2])
    sgd = SGDOptimizer(model, learning_rate=0.2)
    sgd.zero_grad()

    for g in sgd.grads():
      self.assertAlmostEqual(np.sum(np.abs(g)), 0)

  def test_sgd_step(self):
    model = MLP(n_in=10, hiddens=[5, 2])
    sgd = SGDOptimizer(model, learning_rate=0.2)
    loss_func = CrossEntropyLoss()
    
    x = np.zeros((3, 10), dtype=np.float32)
    ytrue = np.array([0,1,0], dtype=int)

    ypred = model.forward(x)
    loss = loss_func.forward(ypred, ytrue)

    sgd.zero_grad()
    dout = loss_func.backward()
    dx = model.backward(dout)
    sgd.step()

  def test_sgd_n_step(self):
    model = MLP(n_in=10, hiddens=[5, 2])
    sgd = SGDOptimizer(model, learning_rate=0.02)
    loss_func = CrossEntropyLoss()
    n_step = 10
    
    x = np.zeros((3, 10), dtype=np.float32)
    ytrue = np.array([0,1,0], dtype=int)

    print()
    for step in range(n_step):
      model.train()
      ypred = model.forward(x)
      loss = loss_func.forward(ypred, ytrue)

      print(f"step {step} {loss:.4f}")
      if step > 0:
        self.assertLess(loss, old_loss) ## SGD step reduces loss function
      old_loss = loss

      sgd.zero_grad()
      dout = loss_func.backward()
      dx = model.backward(dout)
      sgd.step()

## Tests for Dropout

In [None]:
class TestDropoutMethods(unittest.TestCase):
  def test_dropout_init(self):
    do = Dropout(p=0.2)
    self.assertEqual(do.p, 0.2)
    self.assertEqual(do.parameters(), [])
    self.assertEqual(do.grads(), [])

  def test_dropout_forward(self):
    do = Dropout(p=0.30)
    np.random.seed(42)
    x = np.ones((10, 10))
    y = do.forward(x)
    count_zero = np.sum(y==0)
    self.assertEqual(count_zero, 34)

  def test_dropout_back(self):
    do = Dropout(p=0.30)
    np.random.seed(42)
    x = np.ones((10, 10))*2
    y = do.forward(x)
    dx = do.backward(np.ones_like(y))
    count_zero = np.sum(dx==0)
    self.assertEqual(count_zero, 34)


# FC Layer, MLP, ResNet


## Fully connected *layer*


In [None]:
class FC:
  def __init__(self, n_in, n_out, activation=None):
    self.n_in = n_in
    self.n_out = n_out
    self.activation = activation
    # self.W = np.zeros(shape=(n_in, n_out))
    std = math.sqrt(2) if activation == "relu" else 1.0
    self.W = np.random.normal(scale=1.0/math.sqrt(n_in), size=(n_in, n_out))
    self.dW = np.zeros(shape=(n_in, n_out))

  @staticmethod
  def stable_sigmoid(x):
    z = np.zeros_like(x)
    z[x >= 0] = 1 / ( 1+np.exp(-x[x >= 0]) )
    z[x < 0] = np.exp(x[x < 0])
    z[x < 0] = z[x < 0] / ( 1+z[x < 0] )
    return z

  def __activation(self, a):
    if self.activation is None:
      f = a.copy() # N x n_out
    elif self.activation == "sigmoid":
      f = self.stable_sigmoid(a) # 1.0/(1+np.exp(-self.a))
    elif self.activation == "relu":
      f = a.copy()
      f[a < 0] = 0
    else:
      raise NotImplementedError(f"NotImplementedError FC.forward activation={self.activation}")
    return f

  def __dactivation(self, df, f, a):
    if self.activation is None:
      da = df.copy() # N x n_out
    elif self.activation == "sigmoid":
      da = f*(1-f)*df # N x n_out
    elif self.activation == "relu":
      da = df.copy()
      da[a < 0] = 0
    else:
      raise NotImplementedError(f"NotImplementedError FC.backward activation={self.activation}")
    return da

  def forward(self, x):
    ## x: N x n_in
    ## save computation for backward phase
    self.x = x.copy()
    self.a = np.matmul(x, self.W) # N x n_out
    self.f = self.__activation(self.a)
    return self.f

  def backward(self, df):
    ## df: N x n_out
    ## use pre-compute self.x, self.a and self.f to compute dx and dW
    da = self.__dactivation(df, self.f, self.a)
    self.dW = np.einsum('ij,ik->jk', self.x, da) # n_in x n_out
    self.dx = np.matmul(da, self.W.T) # N x n_in
    self.df = df
    self.da = da
    return self.dx

  def parameters(self):
    return [self.W]

  def grads(self):
    return [self.dW]

  def train(self):
    pass
  
  def eval(self):
    pass

## Multilayer Perceptron

In [None]:
class MLP:
  def __init__(self, n_in, hiddens, activation="sigmoid", last_layer_linear=True):
    self.n_in = n_in
    self.hiddens = hiddens
    self.layers = [
                   # use sigmoid activation for hidden layer
                   # use linear activation for output layer
                   FC(n_in=hiddens[i-1] if i > 0 else n_in,
                      n_out=hiddens[i],
                      activation=activation if (i < len(hiddens)-1) or (not last_layer_linear) else None)
                   for i in range(len(hiddens))
                   ]

  def forward(self, x):
    out = x
    for layer in self.layers:
      out = layer.forward(out)
    return out

  def backward(self, dout):
    for layer in self.layers[::-1]:
      dout = layer.backward(dout)
    return dout

  def parameters(self):
    return sum([layer.parameters() for layer in self.layers], [])

  def grads(self):
    return sum([layer.grads() for layer in self.layers], [])

  def train(self):
    for layer in self.layers:
      layer.train()

  def eval(self):
    for layer in self.layers:
      layer.eval()

## Residual Block

In [None]:
class ResidualBlock:
  def __init__(self, n_in, hiddens, activation="sigmoid", last_layer_linear=False, dropout=-1.0):
    ## initialize layers
    n_out = hiddens[-1]
    self.input_fc = FC(n_in, hiddens[0], activation=activation)
    self.block = MLP(n_in=hiddens[0], hiddens=hiddens[1:], activation=activation, last_layer_linear=last_layer_linear)
    self.skip = FC(n_in=hiddens[0], n_out=n_out, activation=activation) if hiddens[0] != n_out else None
    self.dropout = Dropout(p=dropout) if dropout > 0 else None

  def forward(self, x):
    x = self.input_fc.forward(x)
    block_out = self.block.forward(x)
    skip_out = self.skip.forward(x) if self.skip is not None else x
    out = block_out + skip_out
    out = self.dropout.forward(out) if self.dropout is not None else out
    return out

  def backward(self, dout):
    dout = self.dropout.backward(dout) if self.dropout is not None else dout
    d_block_in = self.block.backward(dout)
    d_skip_in  = self.skip.backward(dout) if self.skip is not None else dout
    dx = d_block_in + d_skip_in
    dx = self.input_fc.backward(dx)
    return dx

  def parameters(self):
    return self.input_fc.parameters() + self.block.parameters() + (self.skip.parameters() if self.skip is not None else [])

  def grads(self):
    return self.input_fc.grads() + self.block.grads() + (self.skip.grads() if self.skip is not None else [])

  def train(self):
    self.input_fc.train()
    self.block.train()
    if self.skip is not None:
      self.skip.train()
    if self.dropout is not None:
      self.dropout.train()

  def eval(self):
    self.input_fc.eval()
    self.block.eval()
    if self.skip is not None:
      self.skip.eval()
    if self.dropout is not None:
      self.dropout.eval()


## Residual Network

In [None]:
class ResNet:
  def __init__(self, n_in, blocks_hiddens, n_out, activation="sigmoid", dropout=None):
    self.n_in = n_in
    self.blocks_hiddens = blocks_hiddens
    self.n_out = n_out

    self.blocks = [
                   ResidualBlock(
                       n_in=blocks_hiddens[i-1][-1] if i > 0 else n_in,
                       hiddens = blocks_hiddens[i],
                       activation=activation,
                       last_layer_linear=False,
                       dropout=-1 if dropout is None else dropout[i]
                       )
                   for i in range(len(blocks_hiddens))
                   ]
    self.fc = FC(n_in=blocks_hiddens[-1][-1], n_out=n_out)

  def forward(self, x):
    out = x
    for block in self.blocks:
      out = block.forward(out)
    out = self.fc.forward(out)
    return out

  def backward(self, dout):
    dout = self.fc.backward(dout)
    for block in self.blocks[::-1]:
      dout = block.backward(dout)
    return dout

  def parameters(self):
    return sum([block.parameters() for block in self.blocks], []) + self.fc.parameters()

  def grads(self):
    return sum([block.grads() for block in self.blocks], []) + self.fc.grads()

  def train(self):
    for layer in self.blocks+[self.fc]:
      layer.train()

  def eval(self):
    for layer in self.blocks+[self.fc]:
      layer.eval()

## Dropout layer

In [None]:
class Dropout:
  def __init__(self, p):
    self.p = p
    self.is_train = True

  def parameters(self):
    return []

  def grads(self):
    return []

  def forward(self, x):
    if self.is_train:
      self.mask = np.random.rand(*x.shape) < self.p
      self.x = x
      y = x.copy()
      y[self.mask] = 0
    else:
      y = x * (1-self.p)
    return y

  def backward(self, dy):
    dx = dy.copy()
    dx[self.mask] = 0
    return dx

  def train(self):
    self.is_train = True

  def eval(self):
    self.is_train = False


# Cross Entropy Loss

In [None]:
class CrossEntropyLoss:
  @staticmethod
  def stable_softmax(X):
    exps = np.exp(X - np.max(X, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

  def forward(self, ypred, ytrue):
    ## ypred: N x n_out (logit values)
    ## ytrue: N (class label: int value in 0-->n_out-1)
    ## should return - sum_i sum_c y_ic \log mu_ic
    n, n_out = ypred.shape
    self.ypred = ypred
    self.ytrue = ytrue
    self.mu = self.stable_softmax(ypred)
    mu_ytrue = self.mu[range(n), ytrue]
    mu_ytrue[mu_ytrue < 1e-8] = 1e-8
    loss = np.sum(-np.log(mu_ytrue))
    return loss

  def backward(self):
    ## should return d_ypred, derivative of loss on ypred
    ## d_ypred = mu - y (one-hot encoding of ytrue)
    n, n_out = self.ypred.shape
    d_ypred = self.mu.copy()
    d_ypred[range(n), self.ytrue] -= 1
    return d_ypred

# Stochastic Gradient Descent optimizer

In [None]:
import math
class SGDOptimizer:
  def __init__(self, model, learning_rate, regularization=0.0):
    self.model = model
    self.learning_rate = learning_rate
    self.regularization = regularization
    self.current_step = 0

  def parameters(self):
    return self.model.parameters()

  def grads(self):
    return self.model.grads()

  def zero_grad(self):
    for g in self.grads():
      g.fill(0)

  def step(self):
    ## input is the derivative of loss function on the output of the model
    ## dW has been computed by backward functions
    ## perform a gradient step W = W - 1/sqrt(t) lambda dW
    ## the learning rate is reduced over time for convergence
    self.current_step += 1
    for p, g in zip(self.parameters(), self.grads()):
      g = self.regularization*p + g
      g = np.clip(g, -1, 1)
      p -= 1.0 / math.sqrt(self.current_step)*self.learning_rate*g


In [None]:
import math
class AdaGradOptimizer:
  def __init__(self, model, learning_rate, regularization=0.0):
    self.model = model
    self.learning_rate = learning_rate
    self.regularization = regularization
    self.current_step = 0
    self.sum_grad = [np.zeros_like(p) for p in model.parameters()]

  def parameters(self):
    return self.model.parameters()

  def grads(self):
    return self.model.grads()

  def zero_grad(self):
    for g in self.grads():
      g.fill(0)

  def step(self):
    ## input is the derivative of loss function on the output of the model
    ## dW has been computed by backward functions
    ## perform a scaled gradient step W = W - 1/sqrt(G+eps) lambda dW
    ## the learning rate is reduced over time by square root of sum of gradient squares

    eps = 1e-8
    for p, g, G in zip(self.parameters(), self.grads(), self.sum_grad):
      g = self.regularization*p + g
      g = np.clip(g, -1, 1)
      G += (g*g) ## sum of gradient squares until current step

      p -= (self.learning_rate / np.sqrt(G+eps) * g)


# Run the unit tests

In [None]:
unittest.main(argv=[''], verbosity=2, exit=False)

test_ce_backward (__main__.TestCEMethods) ... ok
test_ce_forward (__main__.TestCEMethods) ... ok
test_dropout_back (__main__.TestDropoutMethods) ... ok
test_dropout_forward (__main__.TestDropoutMethods) ... ok
test_dropout_init (__main__.TestDropoutMethods) ... ok
test_fc_backward_identity (__main__.TestFCMethods) ... ok
test_fc_forward (__main__.TestFCMethods) ... ok
test_fc_forward_identity (__main__.TestFCMethods) ... ok
test_fc_init (__main__.TestFCMethods) ... ok
test_mlp_backward (__main__.TestMLPMethods) ... ok
test_mlp_forward (__main__.TestMLPMethods) ... ok
test_mlp_init (__main__.TestMLPMethods) ... ok
test_res_backward (__main__.TestResBlockMethods) ... ok
test_res_forward (__main__.TestResBlockMethods) ... ok
test_res_identity (__main__.TestResBlockMethods) ... ok
test_res_init (__main__.TestResBlockMethods) ... ok
test_sgd_init (__main__.TestSGDMethods) ... ok
test_sgd_n_step (__main__.TestSGDMethods) ... ok
test_sgd_step (__main__.TestSGDMethods) ... ok
test_sgd_zero_gra


step 0 1.9250
step 1 1.9240
step 2 1.9234
step 3 1.9229
step 4 1.9225
step 5 1.9221
step 6 1.9218
step 7 1.9215
step 8 1.9212
step 9 1.9210


ok

----------------------------------------------------------------------
Ran 20 tests in 0.053s

OK


<unittest.main.TestProgram at 0x7f1a1ff64510>

# A training and evaluation example
MLP network and ResNet (more layers)

In [None]:
class Accumulator:
  def __init__(self):
    self.total_sample = 0
    self.key_values = {}

  def __call__(self, n_sample, **kwargs):
    for k, v in kwargs.items():
      if k not in self.key_values:
        self.key_values[k] = v
      else:
        self.key_values[k] += v
    self.total_sample += n_sample

  def mean(self, key):
    return self.key_values[key] / self.total_sample

In [None]:
import sklearn
from sklearn import datasets
from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing
from tqdm.notebook import tqdm_notebook as tqdm

def prepare_data():
  X, y = datasets.load_digits(return_X_y=True)
  X, Xtest, y, ytest = model_selection.train_test_split(X, y, test_size=0.33, random_state=42)
  transform = preprocessing.MinMaxScaler()
  X = transform.fit_transform(X)
  Xtest = transform.transform(Xtest)
  return X, Xtest, y, ytest

def prepare_trainer(model):
  # sgd = SGDOptimizer(model, learning_rate=0.1, regularization=0.03)
  sgd = AdaGradOptimizer(model, learning_rate=0.1, regularization=0.03)
  loss_func = CrossEntropyLoss()
  return sgd, loss_func

def prepare_data_loader(X, y, batch_size):
  n = X.shape[0]
  permutation = np.random.permutation(n)
  for i in range(0, n, batch_size):
    j = i+batch_size if i+batch_size <= n else n
    batch_x = X[permutation[i:j]]
    batch_y = y[permutation[i:j]]
    yield batch_x, batch_y

def get_model(n_in, n_out):
  np.random.seed(101)
  # model = MLP(n_in=n_in, hiddens=[128, 64, 10], activation="relu")
  # model = ResNet(n_in=n_in, blocks_hiddens=[[128,64,128]], n_out=n_out, activation="sigmoid")
  model = ResNet(
      n_in=n_in,
      blocks_hiddens=[[128,32,256],[64,32,64], [32,16,32], [16,8,16]],
      n_out=n_out,
      activation="relu",
      dropout=[0.2,0.2,0.2,0.2])
  # model = ResNet(n_in=n_in, blocks_hiddens=[[128,16,128], [64,8,64], [32,4,32], [16,2,16]], n_out=n_out, activation="relu")
  return model

class Config:
  n_epoch = 200
  batch_size = 32

def main():
  X, Xtest, y, ytest = prepare_data()
  config = Config()

  model = get_model(n_in=X.shape[1], n_out=10)
  sgd, loss_func = prepare_trainer(model)

  pbar = tqdm(range(config.n_epoch))
  val_acc = 0
  for epoch in pbar:
    data_loader = prepare_data_loader(X, y, config.batch_size)

    model.train() ## set train mode
    accumulator = Accumulator()
    for step, (batch_x, batch_y) in enumerate(data_loader):
      ## forward pass
      batch_yp = model.forward(batch_x)
      loss = loss_func.forward(batch_yp, batch_y)

      ## backward pass and an optimization step
      sgd.zero_grad()
      dout = loss_func.backward()
      dx = model.backward(dout)
      sgd.step()

      ## log training progress
      n_correct = np.sum(np.argmax(batch_yp, axis=1) == batch_y)
      accumulator(len(batch_y), correct=n_correct, loss=loss)
    
      pbar.set_description(f"epoch {epoch} step {step+1} train_loss {accumulator.mean('loss'):.4f}\
                train_acc {accumulator.mean('correct')*100:.2f}% val_acc {val_acc*100:.2f}")

    model.eval()  ## set evaluation mode
    val_acc = np.sum(np.argmax(model.forward(Xtest), axis=1) == ytest) / len(ytest)

  ypred = np.argmax(model.forward(Xtest), axis=1)
  print(metrics.classification_report(ytest, ypred))
  print(metrics.confusion_matrix(ytest, ypred))

main()

  0%|          | 0/200 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.98      1.00      0.99        55
           1       0.95      0.96      0.95        55
           2       0.96      1.00      0.98        52
           3       0.98      0.98      0.98        56
           4       1.00      1.00      1.00        64
           5       0.96      0.97      0.97        73
           6       0.98      0.98      0.98        57
           7       1.00      0.98      0.99        62
           8       0.94      0.90      0.92        52
           9       0.98      0.96      0.97        68

    accuracy                           0.97       594
   macro avg       0.97      0.97      0.97       594
weighted avg       0.97      0.97      0.97       594

[[55  0  0  0  0  0  0  0  0  0]
 [ 0 53  1  0  0  0  0  0  1  0]
 [ 0  0 52  0  0  0  0  0  0  0]
 [ 0  0  0 55  0  1  0  0  0  0]
 [ 0  0  0  0 64  0  0  0  0  0]
 [ 1  0  0  0  0 71  1  0  0  0]
 [ 0  0  0  0  0  1 56  0  0  0]
 [ 0  0  0

## pytorch_lightning
- model
- data loader
- trainer