# mnist 데이터를 이용한 multi_layers 학습

- toc:true
- branch: master
- badges: true
- comments: true
- author: 



## 12_1_single_layer



In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

- Dense 함수와 여러개의 초기화 값 정의하기

In [None]:
def Dense(x, w, b, activations=None):

  return activations(x @ w + b) if activations else (x @ w + b)

def make_weight_normal(n_input, n_output):
  w = tf.Variable(tf.random.uniform([n_input, n_output]))
  b = tf.Variable(tf.random.uniform([n_output]))

  return w, b

def make_weight_glorot(n_input, n_output):
  glorot = keras.initializers.GlorotUniform()
  w = tf.Variable(glorot([n_input, n_output]))
  b = tf.Variable(tf.zeros([n_output]))

  return w, b

def make_weight_he(n_input, n_output):
  he = keras.initializers.HeUniform()
  w = tf.Variable(he([n_input, n_output]))
  b = tf.Variable(tf.zeros([n_output]))

  return w, b


- mnist_single_layer 함수 생성

In [None]:
def mnist_single_layer():
  (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
  print(x_train.shape, x_test.shape)        # (60000, 28, 28) (10000, 28, 28) 
  print(y_train.shape, y_test.shape)        # (60000,) (10000,) => y값이 1차원이라는 뜻은 sparse 버전으로 되어있다는 의미

  x_train = x_train.reshape(-1, 784)
  x_test = x_test.reshape(-1, 784)

  print(min(x_train[0]), max(x_train[0]))   # 0, 255

  x_train = x_train / 255
  x_test = x_test / 255

# x데이터를 0~1 값으로 정규화시켜주는 작업.

  n_classes = 10                              # n_output 값

  # w, b = make_weight_normal(x_train.shape[-1], n_classes)
  w, b = make_weight_glorot(x_train.shape[-1], n_classes)
  # w, b = make_weight_he(x_train.shape[-1], n_classes)

  optimizer = tf.keras.optimizers.Adam(0.01)
# SGD, RMSprop 써도 괜찮음 여러 개 써가면서 성능 비교!

  for i in range(100):
    with tf.GradientTape() as tape:
      hx = Dense(x_train, w, b, activations=keras.activations.softmax)

      scce = keras.losses.SparseCategoricalCrossentropy()
      loss = scce(y_train, hx)

    gradient = tape.gradient(loss, [w, b])
    optimizer.apply_gradients(zip(gradient, [w, b]))

    print(i, loss.numpy())
  print()  

  p = Dense(x_test, w, b, activations=keras.activations.softmax)
  print(p.numpy().shape)                #(10000, 10)

  p_arg = np.argmax(p.numpy(), axis=1)

  print(p_arg)

  print('acc:', np.mean(p_arg == y_test))

In [None]:
mnist_single_layer()

(60000, 28, 28) (10000, 28, 28)
(60000,) (10000,)
0 255
0 2.4037037
1 1.9596609
2 1.599312
3 1.3130828
4 1.1049172
5 0.95262057
6 0.83855027
7 0.7554665
8 0.69375753
9 0.64428
10 0.60332835
11 0.5701764
12 0.54368937
13 0.5217784
14 0.502841
15 0.48651809
16 0.47281685
17 0.4612233
18 0.45088997
19 0.44128928
20 0.43242112
21 0.42447037
22 0.41741672
23 0.41099578
24 0.4049556
25 0.39925858
26 0.39401338
27 0.3892681
28 0.38491824
29 0.38080114
30 0.376837
31 0.37305978
32 0.36953098
33 0.36624712
34 0.36313674
35 0.36013457
36 0.35723847
37 0.35448992
38 0.35190997
39 0.34946993
40 0.34712353
41 0.34485543
42 0.34268677
43 0.34063777
44 0.33869678
45 0.33683166
46 0.33502424
47 0.33328217
48 0.33161727
49 0.33002266
50 0.32847792
51 0.32697153
52 0.32550958
53 0.32410192
54 0.32274655
55 0.32143202
56 0.32015175
57 0.3189091
58 0.317709
59 0.31654832
60 0.31541887
61 0.3143168
62 0.3132445
63 0.31220457
64 0.31119463
65 0.31020984
66 0.30924878
67 0.30831352
68 0.30740508
69 0.3065207

- mini_batch 사용해 생성

In [None]:
def mnist_single_layer_mini_batch():
  (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
  print(x_train.shape, x_test.shape)        # (60000, 28, 28) (10000, 28, 28) 
  print(y_train.shape, y_test.shape)        # (60000,) (10000,) => y값이 1차원이라는 뜻은 sparse 버전으로 되어있다는 의미

  x_train = x_train.reshape(-1, 784)
  x_test = x_test.reshape(-1, 784)

  print(min(x_train[0]), max(x_train[0]))   # 0, 255

  x_train = x_train / 255
  x_test = x_test / 255

# x데이터를 0~1 값으로 정규화시켜주는 작업.

  n_classes = 10                              # n_output 값

  w, b = make_weight_normal(x_train.shape[-1], n_classes)

  # w, b = make_weight_glorot(x_train.shape[-1], n_classes)
  # acc: 0.4583
  
  # w, b = make_weight_he(x_train.shape[-1], n_classes)

  optimizer = tf.keras.optimizers.Adam(0.01)
# SGD, RMSprop 써도 괜찮음 여러 개 써가면서 성능 비교!

  epoch = 10                               # 60000데이터를 10회 반복한다는 의미 
  batch_size = 100
  n_literation = len(x_train) // batch_size   # 600

  for i in range(epoch):
    total = 0
    for j in range(n_literation):              # 6000번이나 반복 확률이 up
      n1 = j * batch_size
      n2 = n1 * batch_size

      xx = x_train[n1:n2]
      yy = y_train[n1:n2]

      with tf.GradientTape() as tape:
        hx = Dense(xx, w, b, activations=keras.activations.softmax)

        scce = keras.losses.SparseCategoricalCrossentropy()
        loss = scce(yy, hx)
        total += loss.numpy()

      gradient = tape.gradient(loss, [w, b])
      optimizer.apply_gradients(zip(gradient, [w, b]))

    print(i, total / n_literation)
  print()  

  p = Dense(x_test, w, b, activations=keras.activations.softmax)
  print(p.numpy().shape)                #(10000, 10)

  p_arg = np.argmax(p.numpy(), axis=1)

  print(p_arg)

  print('acc:', np.mean(p_arg == y_test))


In [None]:
mnist_single_layer_mini_batch()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(60000, 28, 28) (10000, 28, 28)
(60000,) (10000,)
0 255
0 0.31139886550294854
1 0.1945932127473255
2 0.17942959094420075
3 0.17137116949539632
4 0.1662351022940129
5 0.16266893739035973
6 0.16009065205580555
7 0.15819382834791515
8 0.15678756201523356
9 0.1557471169391647

(10000, 10)
[7 2 1 ... 4 5 6]
acc: 0.899


## 12_2_multi_layers

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

- layer 3개

In [None]:
def mnist_multiple_layer_mini_batch_3():
  (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
  print(x_train.shape, x_test.shape)        # (60000, 28, 28) (10000, 28, 28) 
  print(y_train.shape, y_test.shape)        # (60000,) (10000,) => y값이 1차원이라는 뜻은 sparse 버전으로 되어있다는 의미

  x_train = x_train.reshape(-1, 784)
  x_test = x_test.reshape(-1, 784)

  print(min(x_train[0]), max(x_train[0]))   # 0, 255

  x_train = x_train / 255
  x_test = x_test / 255

# x데이터를 0~1 값으로 정규화시켜주는 작업.

  n_classes = 10                              # n_output 값

  w1, b1 = make_weight_normal(x_train.shape[-1], 256)
  w2, b2 = make_weight_normal(256, 256)
  w3, b3 = make_weight_normal(256, n_classes)
  # acc: 0.9423

  # w1, b1 = make_weight_glorot(x_train.shape[-1], 256)
  # w2, b2 = make_weight_glorot(256, 256)
  # w3, b3 = make_weight_glorot(256, n_classes)
  # 결과 :

  # w1, b1 = make_weight_he(x_train.shape[-1], 256)
  # w2, b2 = make_weight_he(256, 256)
  # w3, b3 = make_weight_he(256, n_classes)
  # 결과 :


  optimizer = tf.keras.optimizers.Adam(0.01)
# SGD, RMSprop 써도 괜찮음 여러 개 써가면서 성능 비교!

  epoch = 10                               # 60000데이터를 10회 반복한다는 의미 
  batch_size = 100
  n_literation = len(x_train) // batch_size   # 600

  for i in range(epoch):
    total = 0
    for j in range(n_literation):              # 6000번이나 반복 확률이 up
      n1 = j * batch_size
      n2 = n1 * batch_size

      xx = x_train[n1:n2]
      yy = y_train[n1:n2]

      with tf.GradientTape() as tape:
        d1 = Dense(xx, w1, b1, activations=keras.activations.relu)
        d2 = Dense(d1, w2, b2, activations=keras.activations.relu)
        hx = Dense(d2, w3, b3, activations=keras.activations.softmax)

  # layer끼리 연결할 때 activation은 non-linear해야한다. 
  # sigmoid ,  softmax 는 예측 x = > 마지막에만 확률로 변경하는 sigmoid, softmax 사용
  # relu 는 0보다 작은 값은 0으로, 0보다 큰 값은 그대로 나타냄 0보다 작은 값이 많은 데이터는 사용하기 어려움.

        scce = keras.losses.SparseCategoricalCrossentropy()
        loss = scce(yy, hx)
        total += loss.numpy()

      gradient = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
      optimizer.apply_gradients(zip(gradient, [w1, b1, w2, b2, w3, b3]))

    print(i, total / n_literation)
  print()  

  d1 = Dense(x_test, w1, b1, activations=keras.activations.relu)
  d2 = Dense(d1, w2, b2, activations=keras.activations.relu)
  p = Dense(d2, w3, b3, activations=keras.activations.softmax)
  print(p.numpy().shape)                #(10000, 10)

  p_arg = np.argmax(p.numpy(), axis=1)

  print('acc:', np.mean(p_arg == y_test))

In [None]:
mnist_multiple_layer_mini_batch_3()

(60000, 28, 28) (10000, 28, 28)
(60000,) (10000,)
0 255
0 1461.6672620370766
1 1.1854659057674386
2 0.3881142702441624
3 0.15620899812240774
4 0.06981539531475088
5 0.03489897323093222
6 3.321667264445374
7 0.14723950815076628
8 0.09502428941739102
9 0.06494564253681649

(10000, 10)
acc: 0.9423


`Q` layer 5개로 늘려 정확도를 구하고 layer 3개를 이용했을 때와 비교하시오

In [None]:
def mnist_multiple_layer_mini_batch_5():
  (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
  print(x_train.shape, x_test.shape)        # (60000, 28, 28) (10000, 28, 28) 
  print(y_train.shape, y_test.shape)        # (60000,) (10000,) => y값이 1차원이라는 뜻은 sparse 버전으로 되어있다는 의미

  x_train = x_train.reshape(-1, 784)
  x_test = x_test.reshape(-1, 784)

  print(min(x_train[0]), max(x_train[0]))   # 0, 255

  x_train = x_train / 255
  x_test = x_test / 255

# x데이터를 0~1 값으로 정규화시켜주는 작업.

  n_classes = 10                              # n_output 값

  w1, b1 = make_weight_normal(x_train.shape[-1], 256)
  w2, b2 = make_weight_normal(256, 64)
  w3, b3 = make_weight_normal(64, 32)
  w4, b4 = make_weight_normal(32, 32)
  w5, b5 = make_weight_normal(32, n_classes)
  # acc: 0.8988 

  # w1, b1 = make_weight_glorot(x_train.shape[-1], 256)
  # w2, b2 = make_weight_glorot(256, 64)
  # w3, b3 = make_weight_glorot(64, 32)
  # w4, b4 = make_weight_glorot(32, 32)
  # w5, b5 = make_weight_glorot(32, n_classes)
  # 결과 :

  # w1, b1 = make_weight_he(x_train.shape[-1], 256)
  # w2, b2 = make_weight_he(256, 64)
  # w3, b3 = make_weight_he(64, 32)
  # w4, b4 = make_weight_he(32, 32)
  # w5, b5 = make_weight_he(32, n_classes)
  # 결과 :


  optimizer = tf.keras.optimizers.Adam(0.01)
# SGD, RMSprop 써도 괜찮음 여러 개 써가면서 성능 비교!

  epoch = 10                               # 60000데이터를 10회 반복한다는 의미 
  batch_size = 100
  n_literation = len(x_train) // batch_size   # 600

  for i in range(epoch):
    total = 0
    for j in range(n_literation):              # 6000번이나 반복 확률이 up
      n1 = j * batch_size
      n2 = n1 * batch_size

      xx = x_train[n1:n2]
      yy = y_train[n1:n2]

      with tf.GradientTape() as tape:
        d1 = Dense(xx, w1, b1, activations=keras.activations.relu)
        d2 = Dense(d1, w2, b2, activations=keras.activations.relu)
        d3 = Dense(d2, w3, b3, activations=keras.activations.relu)
        d4 = Dense(d3, w4, b4, activations=keras.activations.relu)
        hx = Dense(d4, w5, b5, activations=keras.activations.softmax)

        scce = keras.losses.SparseCategoricalCrossentropy()
        loss = scce(yy, hx)
        total += loss.numpy()

      gradient = tape.gradient(loss, [w1, b1, w2, b2, w3, b3, w4, b4, w5, b5])
      optimizer.apply_gradients(zip(gradient, [w1, b1, w2, b2, w3, b3, w4, b4, w5, b5]))

    print(i, total / n_literation)
  print()  

  d1 = Dense(x_test, w1, b1, activations=keras.activations.relu)
  d2 = Dense(d1, w2, b2, activations=keras.activations.relu)
  d3 = Dense(d2, w3, b3, activations=keras.activations.relu)
  d4 = Dense(d3, w4, b4, activations=keras.activations.relu)
  p = Dense(d4, w5, b5, activations=keras.activations.softmax)
  print(p.numpy().shape)                #(10000, 10)

  p_arg = np.argmax(p.numpy(), axis=1)

  print('acc:', np.mean(p_arg == y_test))

In [None]:
mnist_multiple_layer_mini_batch_5()

(60000, 28, 28) (10000, 28, 28)
(60000,) (10000,)
0 255
0 115397.61487060547
1 88.93705628871918
2 124.64481646577518
3 8.39682581782341
4 95.10231856604418
5 5.290038377592961
6 3.5611470832582564
7 8.595668331881365
8 25.354680725370223
9 1.1293716185198477

(10000, 10)
acc: 0.8988


`Q` normal 버전 말고 glorot, he 버전도 돌려보면서 정확도를 측정해 보시오 