In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras.layers as L
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

In [2]:
# Hyper parameters
num_epochs = 10
num_classes = 10
batch_size = 256
learning_rate = 0.001

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

print("training_data\n", x_train.shape)
print("test_data\n", x_test.shape)
print("training_label\n", y_train.shape)
print("test_label\n", y_test.shape)

training_data
 (50000, 32, 32, 3)
test_data
 (10000, 32, 32, 3)
training_label
 (50000, 1)
test_label
 (10000, 1)


In [3]:
x_train_ = tf.convert_to_tensor(x_train, dtype=tf.float32)
y_train_ = tf.reshape(tf.one_hot(y_train, 10), (-1, 10))


print(x_train_.shape)
print(y_train_.shape)

(50000, 32, 32, 3)
(50000, 10)


In [5]:
train_dataset = (
    tf.data.Dataset.from_tensor_slices((x_train, y_train))
    .batch(batch_size)
    .shuffle(10000)
)

train_dataset = (
    train_dataset.map(lambda x, y: 
                      (tf.math.divide(tf.cast(x, tf.float32), 255.0), 
                       tf.reshape(tf.one_hot(y, 10), (-1, 10))))
)

print(train_dataset)

<MapDataset shapes: ((None, 32, 32, 3), (None, 10)), types: (tf.float32, tf.float32)>


In [6]:
test_dataset = (
    tf.data.Dataset.from_tensor_slices((x_test, y_test))
    .batch(1000)
    .shuffle(10000)
)
test_dataset = (
    test_dataset.map(lambda x, y: 
                      (tf.math.divide(tf.cast(x, tf.float32), 255.0), 
                       tf.reshape(tf.one_hot(y, 10), (-1, 10))))
)

print(test_dataset)

<MapDataset shapes: ((None, 32, 32, 3), (None, 10)), types: (tf.float32, tf.float32)>


In [7]:
def conv3x3(out_channels, strides=1):
    return L.Conv2D(out_channels, kernel_size=3, 
                    strides=strides, padding='same', use_bias=False)

### training flag
`call` method of `L.BatchNormalization` need to have `traininig` flag because this method have different behavior between traning and evaluation.  

In [9]:
# Residual block
class ResidualBlock(tf.keras.Model):
    def __init__(self, out_channels, strides=1, downsample=None):
        super(ResidualBlock, self).__init__(name='ResidualBlock')
        self.conv1 = conv3x3(out_channels, strides)
        self.bn1 = L.BatchNormalization(axis=-1)
        self.relu = L.ReLU()
        self.conv2 = conv3x3(out_channels)
        self.bn2 = L.BatchNormalization(axis=-1)
        self.downsample = downsample
    

    def call(self, x, training=False):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out, training=training)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out, training=training)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

### tf.keras.Sequential
`call` method of `tf.keras.Sequential` have `training` flag. This flag affects all layers included by the `tf.keras.Sequential` instance.

In [26]:
class ResNet(tf.keras.Model):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__(name='ResNet')
        self.in_channels = 16
        self.conv = conv3x3(16)
        self.bn = L.BatchNormalization(axis=-1)
        self.relu = L.ReLU()
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        self.layer3 = self.make_layer(block, 64, layers[2], 2)
        self.avg_pool = L.AvgPool2D(8)
        self.flatten = L.Flatten()
        self.fc = L.Dense(num_classes)
        
    def make_layer(self, block, out_channels, blocks, strides=1):
        downsample = None
        if (strides != 1) or (self.in_channels != out_channels):
            downsample = tf.keras.Sequential([
                conv3x3(out_channels, strides=strides),
                L.BatchNormalization(axis=-1)])
        layers = []
        layers.append(block(out_channels, strides, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels))
        return tf.keras.Sequential(layers)
    
    def call(self, x, training=False):
        out = self.conv(x)
        out = self.bn(out, training=training)
        out = self.relu(out)
        out = self.layer1(out, training=training)
        out = self.layer2(out, training=training)
        out = self.layer3(out, training=training)
        out = self.avg_pool(out)
        out = self.flatten(out)
        out = self.fc(out)
        return out

In [27]:
model = ResNet(ResidualBlock, [2, 2, 2])
def loss_fn(y, y_pre):
    return tf.nn.softmax_cross_entropy_with_logits(y, y_pre)
#     return tf.keras.losses.categorical_crossentropy(y, y_pre)

def accuracy(y, y_pre):
    return tf.keras.metrics.categorical_accuracy(y, y_pre)

optimizer = tf.keras.optimizers.Adam(learning_rate)

In [40]:
optimizer = tf.keras.optimizers.Adam(learning_rate)
model.compile(optimizer=optimizer,
              loss=tf.nn.softmax_cross_entropy_with_logits,
              metrics=[tf.keras.metrics.categorical_accuracy])

model.summary()

Model: "ResNet"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_120 (Conv2D)          multiple                  432       
_________________________________________________________________
batch_normalization_120 (Bat multiple                  64        
_________________________________________________________________
re_lu_56 (ReLU)              multiple                  0         
_________________________________________________________________
sequential_40 (Sequential)   multiple                  9472      
_________________________________________________________________
sequential_42 (Sequential)   multiple                  37504     
_________________________________________________________________
sequential_44 (Sequential)   multiple                  148736    
_________________________________________________________________
average_pooling2d_8 (Average multiple                  0    

In [43]:
model.fit(train_dataset, epochs=num_epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1a8a5f60cc0>

In [44]:
test_loss, test_acc = model.evaluate(test_dataset)

print("test_accracy: ", test_acc)

test_accracy:  0.7268
