# Lab 06-3: Softmax classifier 를 TensorFlow 로 구현하기

## 핵심키워드
- 다항 분류(Multinomial Classification)
- 소프트맥스(Softmax)
- 크로스 엔트로피(Cross-entropy)
- 경사하강법(Gradient Descent)

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

print(f'>>> original : tf - 2.4.0  |  np - 1.19.4')
print(f'>>> present  : tf - {tf.__version__}  |  np - {np.__version__}')

tf.random.set_seed(777)

>>> original : tf - 2.4.0  |  np - 1.19.4
>>> present  : tf - 2.4.0  |  np - 1.19.5


## Data

In [2]:
x_data = np.array([[1, 2, 1, 1],
                   [2, 1, 3, 2],
                   [3, 1, 3, 4],
                   [4, 1, 5, 5],
                   [1, 7, 5, 5],
                   [1, 2, 5, 6],
                   [1, 6, 6, 6],
                   [1, 7, 7, 7]], dtype=np.float32)
y_data = np.array([[0, 0, 1],
                   [0, 0, 1],
                   [0, 0, 1],
                   [0, 1, 0],
                   [0, 1, 0],
                   [0, 1, 0],
                   [1, 0, 0],
                   [1, 0, 0]], dtype=np.float32)
nb_classes = y_data.shape[1]  # number of classes==3

x_data.shape, y_data.shape, nb_classes

((8, 4), (8, 3), 3)

## Softmax Classification

In [3]:
# Weight and bias setting
W = tf.Variable(tf.random.normal((x_data.shape[1], nb_classes)))
b = tf.Variable(tf.random.normal((nb_classes,)))

print(W, b)

<tf.Variable 'Variable:0' shape=(4, 3) dtype=float32, numpy=
array([[ 0.7706481 ,  0.37335402, -0.05576323],
       [ 0.00358377, -0.5898363 ,  1.5702795 ],
       [ 0.2460895 , -0.09918973,  1.4418385 ],
       [ 0.3200988 ,  0.526784  , -0.7703731 ]], dtype=float32)> <tf.Variable 'Variable:0' shape=(3,) dtype=float32, numpy=array([-1.3080608 , -0.13253094,  0.5513761 ], dtype=float32)>


In [4]:
# Train model
def hypothesis(X, weight=W, bias=b):
  Y = tf.matmul(X, W) + b
  return tf.nn.softmax(Y)

def cost_fn(X, Y):
  logits = hypothesis(X)
  cost = -tf.reduce_mean(tf.reduce_sum(tf.multiply(Y, tf.math.log(logits)), axis=1))
  return cost

def grad_fn(X, Y, weight=W, bias=b):
  with tf.GradientTape() as tape:
    logits = hypothesis(X)
    cost = cost_fn(X, Y)
    grads = tape.gradient(cost, [W, b])
  return grads

def fit(X, Y, weight=W, bias=b, epochs=2000, verbose=100):
  optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)

  print('epoch | cost')
  for step in range(epochs):
    step += 1
    grads = grad_fn(X, Y, W, b)
    optimizer.apply_gradients(grads_and_vars=zip(grads, [W, b]))

    if step == 1 or step % verbose == 0:
      cost = cost_fn(X, Y)
      print(f'{step:5.0f} | {cost:8.6f}')


fit(x_data, y_data)

epoch | cost
    1 | 2.849417
  100 | 0.684151
  200 | 0.613813
  300 | 0.558205
  400 | 0.508306
  500 | 0.461059
  600 | 0.415072
  700 | 0.369636
  800 | 0.324533
  900 | 0.280720
 1000 | 0.246752
 1100 | 0.232798
 1200 | 0.221645
 1300 | 0.211476
 1400 | 0.202164
 1500 | 0.193606
 1600 | 0.185714
 1700 | 0.178415
 1800 | 0.171645
 1900 | 0.165350
 2000 | 0.159483


In [5]:
# Prediction Check
pred = hypothesis(x_data)
print(tf.argmax(pred, 1))
print(tf.argmax(y_data, 1))

tf.Tensor([2 2 2 1 1 1 0 0], shape=(8,), dtype=int64)
tf.Tensor([2 2 2 1 1 1 0 0], shape=(8,), dtype=int64)


## Softmax Classification as a class

In [6]:
class softmax_classifier(tf.keras.Model):
  def __init__(self, nb_features, nb_classes):
    super(softmax_classifier, self).__init__()
    self.W = tf.Variable(tf.random.normal((nb_features, nb_classes)))
    self.b = tf.Variable(tf.random.normal((nb_classes,)))
    pass

  def softmax_regression(self, X):
    pred = tf.matmul(X, self.W) + self.b
    return tf.nn.softmax(pred)

  def cost_fn(self, X, Y):
    logits = self.softmax_regression(X)
    cost = -tf.reduce_mean(tf.reduce_sum(tf.multiply(Y, tf.math.log(logits)), axis=1))
    return cost

  def grad_fn(self, X, Y):
    with tf.GradientTape() as tape:
      logits = self.softmax_regression(X)
      cost = self.cost_fn(X, Y)
      grads = tape.gradient(cost, [self.W, self.b])
    return grads

  def fit(self, X, Y, epochs=2000, verbose=100):
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)

    print('epoch | cost')
    for step in range(epochs):
      step += 1
      grads = self.grad_fn(X, Y)
      optimizer.apply_gradients(grads_and_vars=zip(grads, [self.W, self.b]))

      if step == 1 or step % verbose == 0:
        cost = self.cost_fn(X, Y)
        print(f'{step:5.0f} | {cost:8.6f}')


model = softmax_classifier(nb_features=4, nb_classes=3)
model.fit(x_data, y_data)

epoch | cost
    1 | 2.472669
  100 | 0.639449
  200 | 0.524868
  300 | 0.463929
  400 | 0.416868
  500 | 0.375229
  600 | 0.335701
  700 | 0.296750
  800 | 0.261106
  900 | 0.242689
 1000 | 0.229923
 1100 | 0.218464
 1200 | 0.208102
 1300 | 0.198676
 1400 | 0.190059
 1500 | 0.182148
 1600 | 0.174857
 1700 | 0.168117
 1800 | 0.161866
 1900 | 0.156053
 2000 | 0.150633


In [7]:
# Prediction Check
pred = model.softmax_regression(x_data)
print(tf.argmax(pred, 1))
print(tf.argmax(y_data, 1))

tf.Tensor([2 2 2 1 1 1 0 0], shape=(8,), dtype=int64)
tf.Tensor([2 2 2 1 1 1 0 0], shape=(8,), dtype=int64)
