# Softmax Regression
$$ S(y_i) = \frac{e^{y_i}}{\sum_{i} e^{y_i}} $$

## Cost function : cross entropy
$S$ : Softmax Regression 함수

$L$ : lable(실제 값)

$$ D(S,L) = -{\sum_{i} L_i\log{S_i}} $$ 

## Softmax Classifier

In [6]:
import tensorflow as tf
import numpy as np

x_data = [[1, 2, 1, 1],
 [2, 1, 3, 2],
 [3, 1, 3, 4],
 [4, 1, 5, 5],
 [1, 7, 5, 5],
 [1, 2, 5, 6],
 [1, 6, 6, 6],
 [1, 7, 7, 7]]
y_data = [[0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 1, 0],
 [0, 1, 0],
 [0, 1, 0],
 [1, 0, 0],
 [1, 0, 0]]

# 3개의 클래스를 분류할 때 0,1,2를 각각 [1,0,0],[0,1,0],[0,0,1]로 하나만 hot하게 표시함
# => one-hot encoding
x_data = np.asarray(x_data, dtype = np.float32)
y_data = np.asarray(y_data, dtype = np.float32)

# y의 개수 = 클래스 개수 = label개수
dataset = tf.data.Dataset.from_tensor_slices((x_data, y_data)).batch(len(x_data))
W = tf.Variable(tf.random.normal([4,3]), name='weight')
b = tf.Variable(tf.random.normal([3]), name='bias')
variable = [W,b]

dataset.element_spec

def softmax_fn(features):
    hypothesis = tf.nn.softmax(tf.matmul(features,W)+b)
    return hypothesis

def loss_fn(features, labels):
    hypothesis = tf.nn.softmax(tf.matmul(features,W)+b)
    cost = tf.reduce_mean(-tf.reduce_sum(y_data*tf.math.log(hypothesis),axis=1))
    return cost

def grad(hypothesis, features, labels):
    with tf.GradientTape() as tape:
        loss_value = loss_fn(features, labels)
    return tape.gradient(loss_value, [W, b])

optimizer = tf.keras.optimizers.SGD(learning_rate = 0.01)

n_epochs = 3000
for step in range(n_epochs + 1):
    
    for features, labels in iter(dataset):
        hypothesis = softmax_fn(features)
        grads = grad(hypothesis, features, labels)
        optimizer.apply_gradients(grads_and_vars = zip(grads, [W, b]))
    
    if step % 300 == 0:
            print("iter: {}, Loss: {:.4f}".format(step, loss_fn(features, labels)))

a = x_data
a = softmax_fn (a)
print(hypothesis) #softmax 함수를 통과시킨 x_data

#argmax 가장큰 값의index를 찾아줌

print(tf.argmax(a,1)) #가설을 통한 예측값
print(tf.argmax(y_data,1)) #실제 값

iter: 0, Loss: 2.7990
iter: 300, Loss: 0.8312
iter: 600, Loss: 0.6635
iter: 900, Loss: 0.6005
iter: 1200, Loss: 0.5601
iter: 1500, Loss: 0.5296
iter: 1800, Loss: 0.5050
iter: 2100, Loss: 0.4844
iter: 2400, Loss: 0.4667
iter: 2700, Loss: 0.4510
iter: 3000, Loss: 0.4369
tf.Tensor(
[[9.9414093e-03 5.8100652e-02 9.3195790e-01]
 [1.6830681e-02 2.0065969e-01 7.8250962e-01]
 [4.1880864e-03 4.3612772e-01 5.5968416e-01]
 [3.4023379e-03 5.9760123e-01 3.9899644e-01]
 [5.5367953e-01 3.9984158e-01 4.6478894e-02]
 [2.7209947e-01 7.2752315e-01 3.7744248e-04]
 [6.0211343e-01 3.9546511e-01 2.4214946e-03]
 [7.0973784e-01 2.8983405e-01 4.2811382e-04]], shape=(8, 3), dtype=float32)
tf.Tensor([2 2 2 1 0 1 0 0], shape=(8,), dtype=int64)
tf.Tensor([2 2 2 1 1 1 0 0], shape=(8,), dtype=int64)
