In [7]:
import numpy as np
import tensorflow as tf

# Data Pipeline

In [3]:
mnist = tf.keras.datasets.fashion_mnist
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [6]:
print(train_images.shape, train_labels.shape, type(train_images))
print(test_images.shape, test_labels.shape, type(test_images))

(60000, 28, 28) (60000,) <class 'numpy.ndarray'>
(10000, 28, 28) (10000,) <class 'numpy.ndarray'>


In [9]:
unique, counts = np.unique(train_labels, axis=-1, return_counts=True)
dict(zip(unique, counts))

{0: 6000,
 1: 6000,
 2: 6000,
 3: 6000,
 4: 6000,
 5: 6000,
 6: 6000,
 7: 6000,
 8: 6000,
 9: 6000}

In [10]:
train_images = train_images.astype(np.float32) / 255.
test_images = test_images.astype(np.float32) / 255.

train_labels = tf.keras.utils.to_categorical(train_labels, 10)
test_labels = tf.keras.utils.to_categorical(test_labels, 10)

In [11]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(buffer_size=100000).batch(64)
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(64)                

# Model

## Keras Sequential API

In [12]:
def create_seq_model():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
    model.add(tf.keras.layers.Dense(128, activation="relu"))
    model.add(tf.keras.layers.Dropout(0.2))
    model.add(tf.keras.layers.Dense(10, activation="softmax"))

    return model

In [13]:
seq_model = create_seq_model()
seq_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 128)               100480    
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 10)                1290      
                                                                 
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


## Keras Functional API

In [17]:
def create_func_model():
    inputs = tf.keras.Input(shape=(28, 28))
    flatten = tf.keras.layers.Flatten()(inputs)
    dense = tf.keras.layers.Dense(128, activation="relu")(flatten)
    drop = tf.keras.layers.Dropout(0.2)(dense)
    outputs = tf.keras.layers.Dense(10, activation="softmax")(drop)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    return model

In [16]:
func_model = create_func_model()
func_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28)]          0         
                                                                 
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_2 (Dense)             (None, 128)               100480    
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_3 (Dense)             (None, 10)                1290      
                                                                 
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


## Model subclassing

In [22]:
class SubClassModel(tf.keras.Model):
    def __init__(self):
        super(SubClassModel, self).__init__() ## tf.keras.Model의 init
        self.flatten = tf.keras.layers.Flatten(input_shape=(28, 28))
        self.dense1 = tf.keras.layers.Dense(128, activation="relu")
        self.drop = tf.keras.layers.Dropout(0.2)
        self.dense2 = tf.keras.layers.Dense(10, activation="softmax")

    def call(self, x, training=False): ## training 여부에 대해 반드시 적어야한다.
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.drop(x)
        x = self.dense2(x)

        return x

In [23]:
subclass_model = SubClassModel()

In [24]:
inputs = tf.zeros((1, 28, 28))
subclass_model(inputs)
subclass_model.summary()

Model: "sub_class_model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_4 (Flatten)         multiple                  0         
                                                                 
 dense_6 (Dense)             multiple                  100480    
                                                                 
 dropout_4 (Dropout)         multiple                  0         
                                                                 
 dense_7 (Dense)             multiple                  1290      
                                                                 
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


# Training

## Keras API

In [26]:
learning_rate = 0.001
seq_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])

In [27]:
history = seq_model.fit(train_dataset, epochs=10, validation_data=test_dataset)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## GradientTape

In [29]:
loss_object = tf.keras.losses.CategoricalCrossentropy()
learning_rate = 0.001
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [30]:
train_loss = tf.keras.metrics.Mean(name="train_loss") ## 1epoch에 발생한 loss들의 평균
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name="train_accuracy")

test_loss = tf.keras.metrics.Mean(name="test_loss")
test_accuracy = tf.keras.metrics.CategoricalAccuracy(name="test_accuracy")

In [31]:
@tf.function
def train_step(model, images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)

    gradients = tape.gradient(loss, model.trainable_variables) ## 미분
    optimizer.apply_gradients(zip(gradients, model.trainable_variables)) ## 미분값으로 파라미터들을 업데이트

    train_loss(loss)
    train_accuracy(labels, predictions)

In [32]:
@tf.function
def test_step(model, images, labels):
    predictions = model(images, training=False)
    loss = loss_object(labels, predictions)

    test_loss(loss)
    test_accuracy(labels, predictions)

In [33]:
EPOCHS = 10

for epoch in range(EPOCHS):
    train_loss.reset_states() ## 1 epoch 돌고 0으로 reset
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    for images, labels in train_dataset:
        train_step(func_model, images, labels)

    for test_images, test_labels in test_dataset:
        test_step(func_model, test_images, test_labels)

    print(f"EPOCH {epoch + 1}",
          f"Loss : {train_loss.result()}",
          f"Acc : {train_accuracy.result() * 100}",
          f"Test Loss : {test_loss.result()}",
          f"Test Acc : {test_accuracy.result() * 100}")

EPOCH 1 Loss : 0.554685652256012 Acc : 80.57333374023438 Test Loss : 0.43196266889572144 Test Acc : 84.68000030517578
EPOCH 2 Loss : 0.4089944064617157 Acc : 85.34833526611328 Test Loss : 0.40316319465637207 Test Acc : 85.27999877929688
EPOCH 3 Loss : 0.3729635179042816 Acc : 86.5633316040039 Test Loss : 0.3778151571750641 Test Acc : 86.54999542236328
EPOCH 4 Loss : 0.3491861820220947 Acc : 87.38333892822266 Test Loss : 0.3672628402709961 Test Acc : 86.75
EPOCH 5 Loss : 0.3331354558467865 Acc : 87.94666290283203 Test Loss : 0.3542054295539856 Test Acc : 87.19000244140625
EPOCH 6 Loss : 0.32074499130249023 Acc : 88.22333526611328 Test Loss : 0.3494870960712433 Test Acc : 87.8499984741211
EPOCH 7 Loss : 0.31001123785972595 Acc : 88.59500122070312 Test Loss : 0.3402135968208313 Test Acc : 87.70999908447266
EPOCH 8 Loss : 0.29981744289398193 Acc : 88.86666870117188 Test Loss : 0.34892159700393677 Test Acc : 87.62000274658203
EPOCH 9 Loss : 0.2944219708442688 Acc : 89.05332946777344 Test Lo

# Model save

## parameter만 저장

In [37]:
seq_model.save_weights("seq_model.ckpt")
seq_model_2 = create_seq_model()
seq_model_2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
                    loss="categorical_crossentropy",
                    metrics=["accuracy"])

In [38]:
seq_model_2.evaluate(test_dataset)



[2.3467483520507812, 0.08609999716281891]

## model 전체 저장

In [39]:
seq_model.save("seq_model")



In [40]:
seq_model_3 = tf.keras.models.load_model("seq_model")

In [41]:
seq_model_3.evaluate(test_dataset)



[0.34272581338882446, 0.8797000050544739]