In [None]:
from dataclasses import dataclass
import math
from pathlib import Path
import tensorflow as tf
from tensorflow.data import AUTOTUNE, Dataset
from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2D, Dense, BatchNormalization, Activation, Dropout
from tensorflow.keras.layers import RandomZoom, RandomFlip, RandomTranslation, RandomRotation
from tensorflow.keras.models import Model

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [None]:
INPUT_DIR=Path('../input/digit-recognizer/')

BATCH_SIZE=32

In [None]:
train = pd.read_csv(INPUT_DIR / 'train.csv')
test = pd.read_csv(INPUT_DIR / 'test.csv')
Y_train = np.array(train.label.values.astype('int32'))
X_train = np.array((train.iloc[:,1:].values).astype('float32')) / 255.
X_test = np.array((test.values).astype('float32')) / 255.
print(f"{np.mean(X_train)} {np.std(X_train)}")
print(Y_train[:10])

NUM_CLASSES = np.max(Y_train)+1
INPUT_SHAPE_2D = (28,28)
INPUT_SHAPE = (*INPUT_SHAPE_2D, 1)
PIXEL_MEAN = np.mean(X_train)
PIXEL_STD = np.std(X_train)

def one_hot(data, classes=NUM_CLASSES):
    eye = np.eye(classes)
    return eye[data]

def normalize(pixels):
    return (pixels - PIXEL_MEAN) / PIXEL_STD

def denormalize(pixels):
    return pixels * PIXEL_STD + PIXEL_MEAN

X_train = normalize(X_train).reshape((-1, *INPUT_SHAPE_2D, 1))
X_test = normalize(X_test).reshape((-1, *INPUT_SHAPE_2D, 1))
Y_train = one_hot(Y_train)
print(Y_train[:10])

In [None]:
# Create dataset from csv tensors
full_set = Dataset.from_tensor_slices((X_train, Y_train))

def make_train_valid(dataset, split=(0.9,)):
    ds_len = len(dataset)
    train_size = math.floor(split[0] * ds_len)
    test_size = ds_len - train_size
    
    ds = dataset.shuffle(len(dataset), reshuffle_each_iteration=False)
    train = ds.take(train_size)
    valid = ds.skip(train_size).take(test_size)
    
    # TODO: Add test option as well.
    return train, valid

import logging
def plot_images(batch, labels=None, images_per_side=2):
    logger = logging.getLogger()
    old_level = logger.level
    logger.setLevel(100)
    
    fig = plt.figure(figsize=(3*images_per_side,3*images_per_side))
    num_images = images_per_side ** 2
    
    images = batch
    for i in range(num_images):
        f = fig.add_subplot(images_per_side,images_per_side,i+1)
        im = denormalize(images[i])
        im = np.reshape(im, INPUT_SHAPE_2D)

        # Pull batch[1] for the title, if labels provided, then assume one-hot encoded
        # label that is mapped to a string by the labels input... not very extensible
        # but okay for now.
        if labels is not None:
            title = labels[i]
            f.set_title(title, color='black')
            
        f.set_xticklabels([])
        f.set_xticks([])
        f.set_yticklabels([])
        f.set_yticks([])
        plt.imshow(im)
    logger.setLevel(old_level)

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomRotation(0.1, fill_mode='nearest'),
    tf.keras.layers.RandomZoom(0.1, fill_mode='nearest'),
    tf.keras.layers.RandomTranslation(0.1, 0.1, fill_mode='nearest'),
    tf.keras.layers.RandomContrast(0.1)
])

train, valid = make_train_valid(full_set)
train = train.shuffle(len(train))\
    .batch(BATCH_SIZE)\
    .map(lambda x, y: (data_augmentation(x, training=True), y),
         num_parallel_calls=AUTOTUNE)\
    .prefetch(buffer_size=AUTOTUNE)
valid = valid.batch(BATCH_SIZE)\
    .prefetch(buffer_size=AUTOTUNE)

batch = next(iter(train))
print(batch[0].shape)
plot_images(batch[0], np.argmax(batch[1], axis=1), images_per_side=4)

test = Dataset.from_tensor_slices(X_test)\
    .batch(BATCH_SIZE)\
    .prefetch(buffer_size=AUTOTUNE)
batch_test = next(iter(test))
plot_images(batch_test)


In [None]:
class ModelSaver(tf.keras.callbacks.Callback):
    def __init__(self, model, model_dir=None, model_name=None):
        self.__model = model
        self.__eps = 0.0002
        
        self.__model_dir = model_dir
        self.__save_name(model_name)

        self.__saved_models = {}
        self.__best_score = float('-inf')
        
    def __save_name(self, model_name):
        if model_name is None:
            self.__base_name = self.model.name
        else:
            self.__base_name = model_name 
            
    def __model_path(self):
        p = Path(self.__model_dir)
        model_file = p / f'{self.__base_name}.h5'
        return model_file
        
    def save_model(self, score):
        if self.__model_dir is None:
            return
        
        model_file = self.__model_path()
        self.__model.save(model_file, save_format='h5')
        self.__saved_models[model_file] = score
        
    def saved_model_info(self):
        return next(iter(self.__saved_models.items()))
                 
    def load_best_model(self):
        # TODO: update to find the lowest scoring __saved_models file
        return tf.keras.models.load_model(self.__model_path())

    def on_epoch_end(self, epoch, logs=None):
        score = logs['val_categorical_accuracy']
        if score > (self.__best_score + self.__eps):
            self.__best_score = score
            self.save_model(score)
            print(f'Saved Model with score {score}')

@dataclass
class layer_config:
    layer_num: int
    filters: int
    stride: int
    dropout: float
    skip_ahead: bool  
    
MODEL_CONFIG = [
    layer_config(0, 25, 2, 0.33, False),
    layer_config(1, 50, 1, 0.5, False),
    layer_config(2, 200, 1, 0.25, True),
    layer_config(3, 400, 2, 0.5, False),
    layer_config(4, 250, 1, 0.1, False),
    layer_config(5, 50, 1, 0.6, True),
    layer_config(6, 325, 1, 0.3, False),
]

def conv_block(config, x):
    # With striding, there may be issues with invalid Conv configurations...
    # Let's avoid this for now by doing 'same' padding which makes it a non-issue,
    # but worth testing to see what happens if an invalid configuration is hit.
    x = Conv2D(config.filters, 3,
               strides=config.stride,
               padding='same',
               activation='relu',
               kernel_initializer=HeNormal(),
               name=f'conv2d-{config.layer_num}')(x)
    x = BatchNormalization(name=f'bn-{config.layer_num}')(x)
    x = Dropout(config.dropout, name=f'drop-{config.layer_num}')(x)
    return x

def create_image_model(input_shape=INPUT_SHAPE, n_classes=NUM_CLASSES):
    inp = Input(input_shape)
    x = inp
    
    skip_layers = []
    for config in MODEL_CONFIG:
        x = conv_block(config, x)
        if config.skip_ahead:
            skip_layers.append(x)  # Add skip-ahead to the final result -- similar to densenet blocks
   
    pools = []
    pools.append(tf.keras.layers.GlobalAveragePooling2D()(x))
    for skip in skip_layers:
        pools.append(tf.keras.layers.GlobalAveragePooling2D()(skip))
    x = tf.concat(pools, axis=-1)
    x = Dropout(0.6)(x)
    
    x = Dense(n_classes)(x)

    return Model(inp, x)

def create_model(input_shape=INPUT_SHAPE, n_classes=NUM_CLASSES):
    inp = Input(input_shape)
    x = inp
    
    image_model = create_image_model(input_shape, n_classes)
    print(image_model.summary())
    x = image_model(x)
    
    return Model(inp, x), image_model

def compile_model(model):
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    opt = tf.keras.optimizers.Adam()
    accuracy = tf.keras.metrics.CategoricalAccuracy()
    top2_accuracy = tf.keras.metrics.TopKCategoricalAccuracy(k=2)
    model.compile(loss=loss, optimizer=opt, metrics=[accuracy, top2_accuracy])

full_model, model = create_model()
compile_model(full_model)

model_saver = ModelSaver(model, 'saved_models', 'baseline')
print("Running Training")
full_model.fit(train,
               validation_data=valid,
               epochs=100,
               callbacks=[model_saver,
                          tf.keras.callbacks.EarlyStopping(monitor='val_categorical_accuracy', patience=10)])

submission_model = model


In [None]:
def create_submission(model, dataset=X_test):
    res = model.predict(dataset)
    labels = np.argmax(res, axis=1)
    index = np.array([x for x in range(1, len(labels)+1)])
    print(index.shape)
    print(labels.shape)
    result = pd.DataFrame({'ImageId': index, 'Label': labels})
    print(result.head(5))
    result.to_csv('submission.csv', index=False)
    return result

t = next(iter(test))
sub = create_submission(submission_model, dataset=test)
plot_images(t, sub['Label'], images_per_side=4)