In [None]:
%cd ..

In [None]:
from typing import Tuple

import numpy as np
import tensorflow as tf

from evgena.model import TrainableTfModel
from evgena.dataset import Dataset

## Fashion MNIST models ##
- "official" [benchmark](https://github.com/zalandoresearch/fashion-mnist#benchmark)
    - 3 Conv, pool, dense, BN (target 0.934)
    - [DenseNet](https://arxiv.org/pdf/1608.06993.pdf) (target 0.954)

### Simple CNN ###

In [None]:
def conv_bn(x, filters, kernel_size, stride, padding, is_training):
    x = tf.layers.conv2d(
        x, filters, (kernel_size, kernel_size),
        strides=(stride, stride), padding=padding, use_bias=False
    )
    x = tf.layers.batch_normalization(x, training=is_training)
    x = tf.nn.relu(x)
    
    return x

In [None]:
def cnn(images, labels, is_training, global_step):
    x = images
    x = conv_bn(x, 32, 3, 1, 'same', is_training)
    x = tf.layers.max_pooling2d(x, (2, 2), (2, 2))
    x = tf.layers.dropout(x, rate=0.3, training=is_training)
    x = conv_bn(x, 64, 3, 1, 'same', is_training)
    x = tf.layers.max_pooling2d(x, (2, 2), (2, 2))
    x = tf.layers.dropout(x, rate=0.3, training=is_training)
    x = conv_bn(x, 128, 3, 1, 'same', is_training)
    x = tf.layers.flatten(x)
    x = tf.layers.dropout(x, rate=0.3, training=is_training)
    x = tf.layers.dense(x, 128, activation=tf.nn.relu)
    x = tf.layers.dropout(x, rate=0.3, training=is_training)
    
    return x

In [None]:
for i, seed in zip(range(10), np.random.randint(65536, size=10, dtype=np.int32)):
    model = TrainableTfModel.construct(
        cnn, 'datasets/stratified_fashion_mnist_{}_fold.npz'.format(i), 128, 0.001, seed=int(seed),
        moment_axis=(0, 1, 2), weight_decay=0.0001, tag='simple_3cnn_10_fold_end_tuning', inference_batch_size=4096
    )
    
    model.train(64)
    model.train(32, from_checkpoint='last', learning_rate=0.0002)
    model.train(32, from_checkpoint='last', learning_rate=0.00004)

### DenseNet ###

In [None]:
class DenseNet:
    def __init__(
        self, growth_rate: int, depth: int, block_count: int,
        use_bc: bool = False, compression: float = 1.0, dropout: float = None
    ):
        self.growth_rate = growth_rate
        self.depth = depth
        self.block_count = block_count
        self.block_depth = (depth - (block_count + 1)) // (block_count * (2 if use_bc else 1))
        self.use_bc = use_bc
        self.compression = compression
        self.dropout = dropout

    def _activation(self, x, is_training):
        x = tf.layers.batch_normalization(x, training=is_training)
        x = tf.nn.relu(x)
        return x
        
    def _bottleneck(self, x, is_training):
        x = self._activation(x, is_training)
        x = tf.layers.conv2d(x, 4 * self.growth_rate, 1, use_bias=False)
        return x
        
    def _composite_f(self, x, filters, kernel_size, is_training):
        x = self._activation(x, is_training)
        x = tf.layers.conv2d(x, filters, kernel_size, padding='same', use_bias=False)
        if self.dropout is not None:
            x = tf.layers.dropout(x, rate=self.dropout, training=is_training)
        return x
        
    def _dense_block(self, x, is_training):
        for layer in range(self.block_depth):
            input_x = x
            if self.use_bc:
                x = self._bottleneck(x, is_training)
            x = self._composite_f(x, self.growth_rate, 3, is_training)
            x = tf.concat((input_x, x), axis=3)

        return x
    
    def _transition_layer(self, x, is_training):
        x = self._composite_f(x, int(self.compression * int(x.get_shape()[-1])), 1, is_training)
        x = tf.layers.average_pooling2d(x, 2, 2)
        return x
        
    def __call__(self, images, labels, is_training, global_step):
        x = tf.layers.conv2d(images, max(16, int(self.growth_rate / self.compression)), 3, padding='same', use_bias=False)
        
        for b_i in range(self.block_count - 1):
            x = self._dense_block(x, is_training)
            x = self._transition_layer(x, is_training)
            
        x = self._dense_block(x, is_training)
        
        x = self._activation(x, is_training)
        x = tf.reduce_mean(x, axis=(1, 2))
        
        return x

In [None]:
for i, seed in zip(range(10), np.random.randint(65536, size=10, dtype=np.int32)):
    model = TrainableTfModel.construct(
        (lambda *args, **kwargs: DenseNet(8, 52, 3, use_bc=True, compression=0.5, dropout=0.2)(*args, **kwargs)),
        'datasets/stratified_fashion_mnist_{}_fold.npz'.format(i), 128, 0.1, seed=int(seed), 
        optimizer=(lambda lr: tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)),
        moment_axis=(0, 1, 2), weight_decay=0.0001, tag='dense_net_BC_8_52_10_fold_nesterov', inference_batch_size=1024
    )
    
    model.train(64)
    model.train(32, from_checkpoint='last', learning_rate=0.01)
    model.train(32, from_checkpoint='last', learning_rate=0.001)

In [None]:
sum([np.product(var.shape.as_list()) for var in model._session.graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])