In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.__version__

'2.10.0'

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

# ZOO classification

### Data list

1. 동물 이름  animal name:     (deleted)
2. 털  hair     Boolean
3. 깃털  feathers     Boolean
4. 알  eggs     Boolean
5. 우유 milk     Boolean
6. 날 수있는지  airborne     Boolean
7. 수중 생물  aquatic      Boolean
8. 포식자  predator     Boolean
9. 이빨이 있는지 toothed      Boolean
10. 척추 동물  backbone     Boolean
11. 호흡 방법  breathes     Boolean
12. 독  venomous     Boolean
13. 물갈퀴  fins     Boolean
14. 다리  legs     Numeric (set of values: {0",2,4,5,6,8})
15. 꼬리  tail     Boolean
16. 사육 가능한 지 domestic     Boolean
17. 고양이 크기인지 catsize      Boolean
18. 동물 타입 type     Numeric (integer values in range [0",6])

In [2]:
xy = np.loadtxt('data-04-zoo.csv',
                delimiter=',',
                dtype=np.int32)
x_train = xy[0:-10, 0:-1]
y_train = xy[0:-10, [-1]]

x_train = tf.cast(x_train, tf.float32)

x_test = xy[-10:, 0:-1]
y_test = xy[-10:, [-1]]

x_test = tf.cast(x_test, tf.float32)

nb_classes = 7  # 0 ~ 6

# [0, 1, 2] 총 class가 3개일때,
# label : 0, 0, 1, 2, 0, 1, 2 ....
# 1 => [0, 1, 0]
# 0 => [1, 0, 0]
# 2 => [0, 0, 1]

print(y_train[15])
y_train = tf.one_hot(list(y_train), nb_classes)
y_train = tf.reshape(y_train, [-1, nb_classes])
print(y_train[15])

y_test = tf.one_hot(list(y_test), nb_classes)
y_test = tf.reshape(y_test, [-1, nb_classes])

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

print(x_train.dtype, y_train.dtype)
print(x_test.dtype, y_test.dtype)


2023-12-20 15:07:14.125808: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-20 15:07:14.126956: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1
[6]
tf.Tensor([0. 0. 0. 0. 0. 0. 1.], shape=(7,), dtype=float32)
(91, 16) (91, 7)
(10, 16) (10, 7)
<dtype: 'float32'> <dtype: 'float32'>
<dtype: 'float32'> <dtype: 'float32'>


In [3]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))

W = tf.Variable(tf.random.normal([16,7],0,1), name='weight')
b = tf.Variable(tf.random.normal([1],0,1), name='bias')

print(W.shape, b.shape)

(16, 7) (1,)


# 가설 설정

* 주어진 동물의 데이터들로 분류하는 가설 모델을 생성한다

## $$ y_k = \frac{exp(H(x_k))}{\sum_{i=1}^{n}exp(H(x_i))}  $$

In [4]:
def logistic_regression(features): # hypothesis_softmax
    return tf.nn.softmax(tf.matmul(features, W) + b)

print(logistic_regression(x_train))

tf.Tensor(
[[3.50468617e-05 1.46588475e-01 3.02867392e-07 1.59002331e-04
  3.15243209e-09 2.92947218e-02 8.23922455e-01]
 [2.11834576e-05 4.19307590e-01 2.01354646e-06 1.20025405e-04
  1.54743107e-09 6.75667971e-02 5.12982428e-01]
 [4.10990033e-04 1.65545044e-03 2.81437319e-02 7.27509081e-01
  3.21609550e-05 1.21626854e-01 1.20621718e-01]
 [3.50468617e-05 1.46588475e-01 3.02867392e-07 1.59002331e-04
  3.15243209e-09 2.92947218e-02 8.23922455e-01]
 [1.48623121e-05 2.48878762e-01 4.65122525e-07 9.17248661e-04
  8.02183708e-10 1.49704620e-01 6.00484133e-01]
 [2.11834576e-05 4.19307590e-01 2.01354646e-06 1.20025405e-04
  1.54743107e-09 6.75667971e-02 5.12982428e-01]
 [5.88102557e-06 6.32969797e-01 5.23096901e-07 1.55069447e-05
  3.16872861e-10 1.58700533e-02 3.51138264e-01]
 [1.23423897e-03 3.19530442e-02 2.40213513e-01 9.33423266e-02
  9.64143474e-05 9.78529155e-02 5.35307527e-01]
 [4.10990033e-04 1.65545044e-03 2.81437319e-02 7.27509081e-01
  3.21609550e-05 1.21626854e-01 1.20621718e-01]

## Loss Function

##$$
\begin{align}
cost(H(x),y) & = −\sum_{n=1}^{n} Y log(H(x))
\end{align}
$$

In [5]:
def loss_fn(hypothesis, labels):
    loss = -tf.reduce_mean(tf.reduce_sum(labels * tf.math.log(hypothesis), axis=1))
    return loss

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

In [10]:
epochs = 10001

for step in range(epochs):
  for features, labels in dataset:
    with tf.GradientTape() as tape:
        pred = logistic_regression(features)
        loss_value = loss_fn(pred,labels)
    grads = tape.gradient(loss_value, [W,b])
    optimizer.apply_gradients(grads_and_vars=zip(grads,[W,b]))
    if step % 100 == 0:
        print("Iter: {}, Loss: {:.4f}".format(step, tf.reduce_mean(loss_fn(logistic_regression(features),labels))))


Iter: 0, Loss: 0.4350
Iter: 100, Loss: 0.4222
Iter: 200, Loss: 0.4101
Iter: 300, Loss: 0.3985
Iter: 400, Loss: 0.3876
Iter: 500, Loss: 0.3772
Iter: 600, Loss: 0.3673
Iter: 700, Loss: 0.3579
Iter: 800, Loss: 0.3489
Iter: 900, Loss: 0.3404
Iter: 1000, Loss: 0.3323
Iter: 1100, Loss: 0.3245
Iter: 1200, Loss: 0.3171
Iter: 1300, Loss: 0.3101
Iter: 1400, Loss: 0.3033
Iter: 1500, Loss: 0.2969
Iter: 1600, Loss: 0.2907
Iter: 1700, Loss: 0.2848
Iter: 1800, Loss: 0.2792
Iter: 1900, Loss: 0.2737
Iter: 2000, Loss: 0.2685
Iter: 2100, Loss: 0.2635
Iter: 2200, Loss: 0.2586
Iter: 2300, Loss: 0.2540
Iter: 2400, Loss: 0.2495
Iter: 2500, Loss: 0.2452
Iter: 2600, Loss: 0.2410
Iter: 2700, Loss: 0.2370
Iter: 2800, Loss: 0.2331
Iter: 2900, Loss: 0.2293
Iter: 3000, Loss: 0.2256
Iter: 3100, Loss: 0.2221
Iter: 3200, Loss: 0.2187
Iter: 3300, Loss: 0.2153
Iter: 3400, Loss: 0.2121
Iter: 3500, Loss: 0.2090
Iter: 3600, Loss: 0.2059
Iter: 3700, Loss: 0.2029
Iter: 3800, Loss: 0.2000
Iter: 3900, Loss: 0.1972
Iter: 4000, 

In [11]:
def accuracy_fn(hypothesis, labels):
    hypothesis = tf.argmax(hypothesis, 1)
    predicted = tf.cast(hypothesis, dtype=tf.float32)
    print(predicted)

    labels = tf.argmax(labels, 1)
    labels = tf.cast(labels, dtype=tf.float32)
    print(labels)

    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, labels), dtype=tf.float32))

    return accuracy

In [12]:
test_acc = accuracy_fn(logistic_regression(x_test),y_test)
print("Testset Accuracy: {:.4f}".format(test_acc))

tf.Tensor([2. 3. 0. 0. 1. 0. 5. 0. 1. 1.], shape=(10,), dtype=float32)
tf.Tensor([2. 3. 0. 0. 1. 0. 5. 0. 6. 1.], shape=(10,), dtype=float32)
Testset Accuracy: 0.9000
