### Select GPU

In [1]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

### Load mnist dataset

In [2]:
import tensorflow as tf
import numpy as np

(train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data()
print(train_x.shape, train_y.shape)
print(train_x.dtype, train_y.dtype)

train_x = train_x.astype(np.float32) / 255.0
test_x = test_x.astype(np.float32) / 255.0

train_y = tf.keras.utils.to_categorical(train_y, num_classes=10)
test_y = tf.keras.utils.to_categorical(test_y, num_classes=10)
print(train_y.shape, train_y.dtype)
print(test_y.shape, test_y.dtype)

(60000, 28, 28) (60000,)
uint8 uint8
(60000, 10) float32
(10000, 10) float32


### Define Loss

In [4]:
class MultiFocalLoss(tf.keras.losses.Loss):
    def __init__(self, class_weights=None, gamma=2, eps=1e-7, name="MultiFocalLoss", **kwargs):
        super().__init__(name=name, **kwargs)
        self.class_weights = class_weights
        self.gamma = gamma
        self.eps = eps
        
    def call(self, y_true, y_pred):
        pos_loss = y_true * tf.pow(1-y_pred, self.gamma) * tf.math.log(y_pred+self.eps)
        neg_loss = (1-y_true) * tf.pow(y_pred, self.gamma) * tf.math.log(1-y_pred+self.eps)
        loss = -(pos_loss + neg_loss) # (B, n_classes)
        if self.class_weights is None: loss = tf.reduce_sum(loss, axis=1)
        else: loss = tf.reduce_sum(loss*self.class_weights, axis=1)
        loss = tf.reduce_mean(loss, axis=0)
        return loss

### Build Model

In [16]:
from tensorflow.keras import layers 
from tensorflow.keras.regularizers import l2

param = {"padding":"same", "activation":"relu", 
         "kernel_initializer":"he_normal", "kernel_regularizer":l2(1e-5)}

img_input = layers.Input(shape=(28,28), dtype=tf.float32)
x = layers.Reshape((28,28,1))(img_input)
x = layers.ZeroPadding2D(padding=2)(x)
x = layers.Conv2D(32, (3,3), **param)(x)
x = layers.MaxPool2D()(x)
x = layers.Conv2D(64, (3,3), **param)(x)
x = layers.MaxPool2D()(x)
x = layers.Conv2D(128, (3,3), **param)(x)
x = layers.MaxPool2D()(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(128, activation='relu', kernel_initializer="he_normal")(x)
x = layers.Dense(10, activation='softmax')(x)

model = tf.keras.models.Model(img_input, x)

model.compile(loss=MultiFocalLoss(), optimizer='adam', metrics=['acc'])

In [17]:
model.fit(train_x, train_y, batch_size=1024, epochs=50, validation_data=(test_x, test_y), verbose=2)

Epoch 1/50
59/59 - 2s - loss: 0.6931 - acc: 0.7301 - val_loss: 0.0782 - val_acc: 0.9632 - 2s/epoch - 27ms/step
Epoch 2/50
59/59 - 1s - loss: 0.0916 - acc: 0.9569 - val_loss: 0.0424 - val_acc: 0.9795 - 677ms/epoch - 11ms/step
Epoch 3/50
59/59 - 1s - loss: 0.0605 - acc: 0.9708 - val_loss: 0.0310 - val_acc: 0.9843 - 678ms/epoch - 11ms/step
Epoch 4/50
59/59 - 1s - loss: 0.0469 - acc: 0.9775 - val_loss: 0.0266 - val_acc: 0.9879 - 681ms/epoch - 12ms/step
Epoch 5/50
59/59 - 1s - loss: 0.0410 - acc: 0.9809 - val_loss: 0.0241 - val_acc: 0.9881 - 677ms/epoch - 11ms/step
Epoch 6/50
59/59 - 1s - loss: 0.0345 - acc: 0.9844 - val_loss: 0.0207 - val_acc: 0.9902 - 676ms/epoch - 11ms/step
Epoch 7/50
59/59 - 1s - loss: 0.0300 - acc: 0.9865 - val_loss: 0.0210 - val_acc: 0.9907 - 676ms/epoch - 11ms/step
Epoch 8/50
59/59 - 1s - loss: 0.0271 - acc: 0.9875 - val_loss: 0.0197 - val_acc: 0.9908 - 677ms/epoch - 11ms/step
Epoch 9/50
59/59 - 1s - loss: 0.0259 - acc: 0.9881 - val_loss: 0.0181 - val_acc: 0.9909 - 6

<keras.callbacks.History at 0x7f4b601c0d00>