# 特徵轉換

## One-hot encoding

In [3]:
import tensorflow as tf

In [4]:
# One-hot encoding
# num_classes：類別個數，可不設定
tf.keras.utils.to_categorical([0, 1, 2, 3], num_classes=9) 

array([[1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)

## MNIST 手寫阿拉伯數字辨識

In [5]:
mnist = tf.keras.datasets.mnist

# 載入 MNIST 手寫阿拉伯數字資料
(x_train, y_train),(x_test, y_test) = mnist.load_data()

# 特徵縮放，使用常態化(Normalization)，公式 = (x - min) / (max - min)
x_train_norm, x_test_norm = x_train / 255.0, x_test / 255.0

# One-hot encoding
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

# 建立模型
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 設定優化器(optimizer)、損失函數(loss)、效能衡量指標(metrics)的類別
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# 模型訓練
history = model.fit(x_train_norm, y_train, epochs=5, validation_split=0.2)

# 評分(Score Model)
score=model.evaluate(x_test_norm, y_test, verbose=0)

for i, x in enumerate(score):
    print(f'{model.metrics_names[i]}: {score[i]:.4f}')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loss: 0.0734
accuracy: 0.9779


## Normalization

In [25]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing

# 測試資料
data = np.array([[0.1, 0.2, 0.3], [0.8, 0.9, 1.0], [1.5, 1.6, 1.7],]) 
layer = preprocessing.Normalization()  # 常態化
layer.adapt(data)             # 訓練
normalized_data = layer(data) # 轉換

# 顯示平均數、標準差
print(f"平均數: {normalized_data.numpy().mean():.2f}")
print(f"標準差: {normalized_data.numpy().std():.2f}")

平均數: 0.00
標準差: 1.00


In [23]:
normalized_data

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[-1.2247449, -1.2247449, -1.2247449],
       [ 0.       ,  0.       ,  0.       ],
       [ 1.2247449,  1.224745 ,  1.224745 ]], dtype=float32)>