In [28]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.__version__

'2.10.0'

In [29]:
# from google.colab import drive
# drive.mount('/content/drive')

# ZOO classification

### Data list

1. 동물 이름  animal name:     (deleted)
2. 털  hair     Boolean
3. 깃털  feathers     Boolean
4. 알  eggs     Boolean
5. 우유 milk     Boolean
6. 날 수있는지  airborne     Boolean
7. 수중 생물  aquatic      Boolean
8. 포식자  predator     Boolean
9. 이빨이 있는지 toothed      Boolean
10. 척추 동물  backbone     Boolean
11. 호흡 방법  breathes     Boolean
12. 독  venomous     Boolean
13. 물갈퀴  fins     Boolean
14. 다리  legs     Numeric (set of values: {0",2,4,5,6,8})
15. 꼬리  tail     Boolean
16. 사육 가능한 지 domestic     Boolean
17. 고양이 크기인지 catsize      Boolean
18. 동물 타입 type     Numeric (integer values in range [0",6])

In [30]:
xy = np.loadtxt('data-04-zoo.csv',
                delimiter=',',
                dtype=np.int32)
x_train = xy[0:-10, 0:-1]
y_train = xy[0:-10, [-1]]

x_train = tf.cast(x_train, tf.float32)

x_test = xy[-10:, 0:-1]
y_test = xy[-10:, [-1]]

x_test = tf.cast(x_test, tf.float32)

nb_classes = 7  # 0 ~ 6

# [0, 1, 2] 총 class가 3개일때,
# label : 0, 0, 1, 2, 0, 1, 2 ....
# 1 => [0, 1, 0]
# 0 => [1, 0, 0]
# 2 => [0, 0, 1]

print(y_train[15])
y_train = tf.one_hot(list(y_train), nb_classes) # one hot 으로 변환
y_train = tf.reshape(y_train, [-1, nb_classes]) # reshape
print(y_train[15])

y_test = tf.one_hot(list(y_test), nb_classes) # one hot 으로 변환
print(f"before reshape:{y_test.shape}")
y_test = tf.reshape(y_test, [-1, nb_classes]) 
# -1은 뒤의 지정한 데이타의 특징을 유지하면서 나머지 차원은 알아서 조정
print(f"after reshape:{y_test.shape}")
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

print(x_train.dtype, y_train.dtype)
print(x_test.dtype, y_test.dtype)


[6]
tf.Tensor([0. 0. 0. 0. 0. 0. 1.], shape=(7,), dtype=float32)
before reshape:(10, 1, 7)
after reshape:(10, 7)
(91, 16) (91, 7)
(10, 16) (10, 7)
<dtype: 'float32'> <dtype: 'float32'>
<dtype: 'float32'> <dtype: 'float32'>


In [31]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))

W = tf.Variable(tf.random.normal([16,nb_classes],0,1), name='weight')
b = tf.Variable(tf.random.normal([nb_classes],0,1), name='bias')

print(W.shape, b.shape)

(16, 7) (7,)


# 가설 설정

* 주어진 동물의 데이터들로 분류하는 가설 모델을 생성한다

## $$ y_k = \frac{exp(H(x_k))}{\sum_{i=1}^{n}exp(H(x_i))}  $$

In [32]:
def logistic_regression(features): # hypothesis_softmax
    return tf.nn.softmax(tf.matmul(features, W) + b)

print(logistic_regression(x_train))

tf.Tensor(
[[4.10938100e-07 1.39875337e-03 1.85227429e-04 2.16101602e-04
  1.86940633e-06 9.98000562e-01 1.97088782e-04]
 [1.59132935e-06 5.75020676e-04 2.43113172e-04 1.00991529e-04
  7.00534247e-06 9.98667598e-01 4.04730410e-04]
 [7.83748031e-02 8.18328142e-01 1.52283655e-02 2.52695531e-02
  8.26386642e-03 1.44316591e-02 4.01035249e-02]
 [4.10938100e-07 1.39875337e-03 1.85227429e-04 2.16101602e-04
  1.86940633e-06 9.98000562e-01 1.97088782e-04]
 [8.65881873e-07 1.41055486e-03 1.71979947e-04 4.03089325e-05
  2.61753598e-06 9.98294532e-01 7.91173661e-05]
 [1.59132935e-06 5.75020676e-04 2.43113172e-04 1.00991529e-04
  7.00534247e-06 9.98667598e-01 4.04730410e-04]
 [4.32815284e-07 1.26512023e-03 3.18771927e-04 1.08118380e-04
  3.33464850e-05 9.97328877e-01 9.45373205e-04]
 [2.66861413e-02 4.99959111e-01 1.92274172e-02 4.61701304e-02
  7.17142597e-02 9.82112810e-03 3.26421678e-01]
 [7.83748031e-02 8.18328142e-01 1.52283655e-02 2.52695531e-02
  8.26386642e-03 1.44316591e-02 4.01035249e-02]

## Loss Function

##$$
\begin{align}
cost(H(x),y) & = −\sum_{n=1}^{n} Y log(H(x))
\end{align}
$$

In [33]:
def loss_fn(hypothesis, labels):
    #loss = -tf.reduce_mean(tf.reduce_sum(labels * tf.math.log(hypothesis) +(1 - labels) * tf.math.log(1 - hypothesis), axis=1))
    loss = tf.keras.losses.categorical_crossentropy(labels, hypothesis)
    return loss

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

In [34]:
epochs = 10001

for step in range(epochs):
  for features, labels in dataset:
    with tf.GradientTape() as tape:
        pred = logistic_regression(features)
        loss_value = loss_fn(pred,labels)
    grads = tape.gradient(loss_value, [W,b])
    optimizer.apply_gradients(grads_and_vars=zip(grads,[W,b]))
    if step % 200 == 0:
        print("Iter: {}, Loss: {:.4f}".format(step, tf.reduce_mean(loss_fn(logistic_regression(features),labels))))


loc("mps_select"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/0032d1ee-80fd-11ee-8227-6aecfccc70fe/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":294:0)): error: 'anec.gain_offset_control' op result #0 must be 4D/5D memref of 16-bit float or 8-bit signed integer or 8-bit unsigned integer values, but got 'memref<1x91x1x7xi1>'


Iter: 0, Loss: 2.7610
Iter: 100, Loss: 0.1172
Iter: 200, Loss: 0.0608
Iter: 300, Loss: 0.0408
Iter: 400, Loss: 0.0307
Iter: 500, Loss: 0.0246
Iter: 600, Loss: 0.0205
Iter: 700, Loss: 0.0176
Iter: 800, Loss: 0.0154
Iter: 900, Loss: 0.0137
Iter: 1000, Loss: 0.0124
Iter: 1100, Loss: 0.0113
Iter: 1200, Loss: 0.0103
Iter: 1300, Loss: 0.0095
Iter: 1400, Loss: 0.0089
Iter: 1500, Loss: 0.0083
Iter: 1600, Loss: 0.0078
Iter: 1700, Loss: 0.0073
Iter: 1800, Loss: 0.0069
Iter: 1900, Loss: 0.0065
Iter: 2000, Loss: 0.0062
Iter: 2100, Loss: 0.0059
Iter: 2200, Loss: 0.0057
Iter: 2300, Loss: 0.0054
Iter: 2400, Loss: 0.0052
Iter: 2500, Loss: 0.0050
Iter: 2600, Loss: 0.0048
Iter: 2700, Loss: 0.0046
Iter: 2800, Loss: 0.0044
Iter: 2900, Loss: 0.0043
Iter: 3000, Loss: 0.0042
Iter: 3100, Loss: 0.0040
Iter: 3200, Loss: 0.0039
Iter: 3300, Loss: 0.0038
Iter: 3400, Loss: 0.0037
Iter: 3500, Loss: 0.0036
Iter: 3600, Loss: 0.0035
Iter: 3700, Loss: 0.0034
Iter: 3800, Loss: 0.0033
Iter: 3900, Loss: 0.0032
Iter: 4000, 

In [35]:
def accuracy_fn(hypothesis, labels):
    hypothesis = tf.argmax(hypothesis, 1)
    predicted = tf.cast(hypothesis, dtype=tf.float32)
    print(predicted)

    labels = tf.argmax(labels, 1)
    labels = tf.cast(labels, dtype=tf.float32)
    print(labels)

    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, labels), dtype=tf.float32))

    return accuracy

In [36]:
test_acc = accuracy_fn(logistic_regression(x_test),y_test)
print("Testset Accuracy: {:.4f}".format(test_acc))

tf.Tensor([2. 3. 0. 0. 1. 0. 5. 0. 6. 1.], shape=(10,), dtype=float32)
tf.Tensor([2. 3. 0. 0. 1. 0. 5. 0. 6. 1.], shape=(10,), dtype=float32)
Testset Accuracy: 1.0000
