# Softmax Regression(Multinomial Classification) - Eager Execution

In [22]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

2.1.0


## Train Data


In [23]:
# train data
x_train = np.array([
    [1., 2., 1., 1.],
    [2., 1., 3., 2.],
    [3., 1., 3., 4.],
    [4., 1., 5., 5.],
    [1., 7., 5., 5.],
    [1., 2., 5., 6.],
    [1., 6., 6., 6.],
    [1., 7., 7., 7.]
], dtype=np.float32) # 8행 4열

y_train = np.array([
    [0., 0., 1.],
    [0., 0., 1.],
    [0., 0., 1.],
    [0., 1., 0.],
    [0., 1., 0.],
    [0., 1., 0.],
    [1., 0., 0.],
    [1., 0., 0.]
], dtype=np.float32) # 8행 3열

# test data
x_test = np.array([[1.,2.,1.,1.]], dtype=np.float32)
y_test = np.array([[0.,0.,1.]], dtype=np.float32)

##  Tensorflow Eager
### 위 Data를 기준으로 가설의 검증을 통해 Softmax Classification 모델을 만들도록 하겠습니다
* Tensorflow data API를 통해 학습시킬 값들을 담는다 (Batch Size는 한번에 학습시킬 Size로 정한다)
* features,labels는 실재 학습에 쓰일 Data (연산을 위해 Type를 맞춰준다)

In [24]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))#.repeat()
print(dataset)

<BatchDataset shapes: ((None, 4), (None, 3)), types: (tf.float32, tf.float32)>


### 위 Data를 기준으로 가설의 검증을 통해 Softmax Classification 모델을 만들도록 하겠습니다
* W와 b은 학습을 통해 생성되는 모델에 쓰이는 Wegith와 Bias (초기값을 variable : 0이나 Random값으로 가능 tf.random_normal([2, 1]) )

In [25]:
tf.random.set_seed(0)  # for reproducibility

# 0의 값으로 변수 설정
#W = tf.Variable(tf.zeros([8,3]), name='weight')
#b = tf.Variable(tf.zeros([3]), name='bias') 
# 임의의 값으로 변수 설정
W = tf.Variable(tf.random.normal((4, 3)), name='weight') # 4행 3열
b = tf.Variable(tf.random.normal((3,)), name='bias')

## Step 1: Hypothesis using matrix(가설 or 모델)
### Softmax 함수를 가설로 선언합니다
* Softmax는 tf.nn.softmax(tf.matmul(X, W) + b)와 같습니다

$$
\begin{align}
Softmax(x) & = \frac{e^{x}}{\sum _{i=1}^{m}{e^{x_i}}}
\end{align}
$$

In [26]:
# 가설
def softmax_regression(features):
    hypothesis  = tf.nn.softmax(tf.matmul(features, W) + b)
    return hypothesis

print(softmax_regression(x_train))

tf.Tensor(
[[6.7622626e-01 5.7505764e-02 2.6626796e-01]
 [8.9231026e-01 9.3884438e-02 1.3805281e-02]
 [9.9712002e-01 2.5964240e-03 2.8364777e-04]
 [9.9732119e-01 2.6285697e-03 5.0274964e-05]
 [7.1394024e-03 1.4758843e-04 9.9271303e-01]
 [9.5985550e-01 1.4678108e-02 2.5466355e-02]
 [4.6822127e-02 1.0678009e-03 9.5211011e-01]
 [1.6258705e-02 2.7375892e-04 9.8346752e-01]], shape=(8, 3), dtype=float32)


## Step 2: Cost Function (손실 함수)
### 크로스 엔트로피 함수
$$
\begin{align}
cost(h(x),y) & = -\sum _{i=1}^{m} y log(h(x))
\end{align}
$$

In [27]:
def loss_fn(features, labels):
    hypothesis = softmax_regression(features)
    cost = -tf.reduce_sum(labels * tf.math.log(hypothesis), axis=1)
    cost = tf.reduce_mean(cost)
    return cost

print(loss_fn(x_train, y_train))

tf.Tensor(4.992257, shape=(), dtype=float32)


## Step 3: Optimizer (Minimize Cost Function)
### Gradient descent
$$ W := W-\alpha \frac { \partial  }{ \partial W } cost(W) $$

In [28]:
# 경사 하강법
# tf.GradientTape() 사용
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)

def grad(features, labels):
    with tf.GradientTape() as tape:
        loss_value = loss_fn(features,labels)
    return tape.gradient(loss_value, [W,b])

### 추론한 값 중에 확률이 높은 값의 인덱스를 리턴합니다.
* 가설을 통해 실재 값과 비교한 정확도를 측정합니다

In [29]:
def accuracy_fn(hypothesis, labels):
    predicted = tf.argmax(hypothesis, 1)
    labels = tf.argmax(labels, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, labels), dtype=tf.int32))
    return accuracy

## 훈련

In [30]:
# 훈련 반복 횟수 설정
epoch = 2000
for step in range(epoch + 1):
    for features, labels  in iter(dataset):
        grads = grad(features, labels)
        optimizer.apply_gradients(grads_and_vars=zip(grads,[W,b]))
        if step % 100 == 0:
            print("Iter: {}, Loss: {:.4f}".format(step, loss_fn(features,labels)))

Iter: 0, Loss: 3.2291
Iter: 100, Loss: 0.7888
Iter: 200, Loss: 0.6915
Iter: 300, Loss: 0.6310
Iter: 400, Loss: 0.5802
Iter: 500, Loss: 0.5330
Iter: 600, Loss: 0.4873
Iter: 700, Loss: 0.4423
Iter: 800, Loss: 0.3975
Iter: 900, Loss: 0.3526
Iter: 1000, Loss: 0.3078
Iter: 1100, Loss: 0.2651
Iter: 1200, Loss: 0.2378
Iter: 1300, Loss: 0.2260
Iter: 1400, Loss: 0.2155
Iter: 1500, Loss: 0.2059
Iter: 1600, Loss: 0.1970
Iter: 1700, Loss: 0.1888
Iter: 1800, Loss: 0.1813
Iter: 1900, Loss: 0.1743
Iter: 2000, Loss: 0.1678


## Predict (예측)

In [31]:
y_train # labels, 실제값

array([[0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.]], dtype=float32)

In [32]:
softmax_regression(x_train).numpy() # prediction, 예측값

array([[3.5121222e-06, 1.2661221e-03, 9.9873036e-01],
       [7.8488269e-04, 8.5700996e-02, 9.1351408e-01],
       [7.7574995e-08, 1.7224593e-01, 8.2775402e-01],
       [8.9749898e-07, 8.4324306e-01, 1.5675601e-01],
       [2.7409819e-01, 7.1326292e-01, 1.2638896e-02],
       [1.4462650e-01, 8.5536098e-01, 1.2500031e-05],
       [7.3665386e-01, 2.6331601e-01, 3.0136525e-05],
       [9.1254658e-01, 8.7452896e-02, 5.2975105e-07]], dtype=float32)

In [33]:
# 트레인 데이터에 대한 예측
test_acc = accuracy_fn(softmax_regression(x_train),y_train)
print("Testset Accuracy: {:.4f}".format(test_acc))

Testset Accuracy: 1.0000


In [34]:
# 테스트 데이터에 대한 예측
test_acc = accuracy_fn(softmax_regression(x_test),y_test)
print("Testset Accuracy: {:.4f}".format(test_acc))

Testset Accuracy: 1.0000
