In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function



import math
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend
from tensorflow.keras import models
from tensorflow.keras import utils
from tensorflow.keras.applications import imagenet_utils
import tensorflow
from copy import deepcopy

In [3]:
import cai.util
import cai.models
import cai.layers

In [4]:
def correct_pad(backend, inputs, kernel_size):
    """Returns a tuple for zero-padding for 2D convolution with downsampling.
    # Arguments
        input_size: An integer or tuple/list of 2 integers.
        kernel_size: An integer or tuple/list of 2 integers.
    # Returns
        A tuple.
    """
    img_dim = 1 # 2 if backend.image_data_format() == 'channels_first' else 1
    #извлечение чисел - первое равно h/w, второе - n_channels
    input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)]
    print(input_size, backend.image_data_format())

    if isinstance(kernel_size, int):
        print('is_inst', isinstance(kernel_size, int))
        kernel_size = (kernel_size, kernel_size)

    if input_size[0] is None:
        adjust = (1, 1)
    else:
        adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)

    correct = (kernel_size[0] // 2, kernel_size[1] // 2)

    return ((correct[0] - adjust[0], correct[0]),
            (correct[1] - adjust[1], correct[1]))

In [5]:
import numpy as np

In [6]:
inputs = np.random.rand(32, 32,32, 3)

kernel_size = 3

In [7]:
correct_pad(backend,  inputs, kernel_size)

(32, 32) channels_last
is_inst True


((0, 1), (0, 1))

In [8]:
def swish(x):
    """Swish activation function.
    # Arguments
        x: Input tensor.
    # Returns
        The Swish activation: `x * sigmoid(x)`.
    # References
        [Searching for Activation Functions](https://arxiv.org/abs/1710.05941)
    """
    if backend.backend() == 'tensorflow':
        try:
            # The native TF implementation has a more
            # memory-efficient gradient implementation
            return backend.tf.nn.swish(x)
        except AttributeError:
            pass

    return x * backend.sigmoid(x)

In [9]:
DEFAULT_BLOCKS_ARGS = [
    {'kernel_size': 3, 'repeats': 1, 'filters_in': 32, 'filters_out': 16,
     'expand_ratio': 1, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25},
    {'kernel_size': 3, 'repeats': 2, 'filters_in': 16, 'filters_out': 24,
     'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
    {'kernel_size': 5, 'repeats': 2, 'filters_in': 24, 'filters_out': 40,
     'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
    {'kernel_size': 3, 'repeats': 3, 'filters_in': 40, 'filters_out': 80,
     'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
    {'kernel_size': 5, 'repeats': 3, 'filters_in': 80, 'filters_out': 112,
     'expand_ratio': 6, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25},
    {'kernel_size': 5, 'repeats': 4, 'filters_in': 112, 'filters_out': 192,
     'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
    {'kernel_size': 3, 'repeats': 1, 'filters_in': 192, 'filters_out': 320,
     'expand_ratio': 6, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25}
] 

CONV_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 2.0,
        'mode': 'fan_out',
        # EfficientNet actually uses an untruncated normal distribution for
        # initializing conv layers, but keras.initializers.VarianceScaling use
        # a truncated distribution.
        # We decided against a custom initializer for better serializability.
        'distribution': 'normal'
    }
}

DENSE_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 1. / 3.,
        'mode': 'fan_out',
        'distribution': 'uniform'
    }
}

In [18]:

def kConv2D(last_tensor, filters=32, channel_axis=3, name=None, activation=None, has_batch_norm=True, has_batch_scale=True, use_bias=True, kernel_size=1, stride_size=1, padding='same', kType=2):
    print("last_tensor  ", last_tensor)
    prev_layer_channel_count = tensorflow.keras.backend.int_shape(last_tensor)[channel_axis]
    print("prev_layer_channel_count ", prev_layer_channel_count)

    
    if kType == 0:
        return kConv2DType0(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == 1:
    #    return kConv2DType1(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == D6_16ch():
    #    return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=16)
    #elif kType == kT3_16ch():
    #    return kConv2DType3(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == 4:
    #    return kConv2DType4(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == 5:
    #    return kConv2DType5(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == 6:
    #    return kConv2DType6(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == 7:
    #    return kConv2DType7(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, bin_conv_count=0)
    #elif kType == 8:
    #    return kConv2DType7(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, bin_conv_count=1)
    #elif kType == 9:
    #    return kConv2DType7(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, bin_conv_count=2)
    #elif kType == 10:
    #    return kConv2DType7(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, bin_conv_count=4)
    #elif kType == 11:
    #    return kConv2DType7(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, bin_conv_count=5)
    #elif kType == 12:
    #    return kConv2DType7(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, bin_conv_count=6)
    elif kType == cai.layers.D6_32ch():
        return cai.layers.kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=32)
    #elif kType == D6_8ch():
    #    return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=8)
    #elif kType == D6_4ch():
    #    return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=4)
    #elif kType == 16:
    #    if prev_layer_channel_count >= filters:
    #        return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=32)
    #    else:
    #        return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=16)
    #elif kType == 17:
    #    if prev_layer_channel_count < filters:
    #        return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=32)
    #    else:
    #        return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=16)
    #elif kType == 18:
    #    if prev_layer_channel_count >= filters:
    #        return kConv2DType7(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, bin_conv_count=5)
    #    else:
    #        return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=16)
    #elif kType == 19:
    #    return kConv2DType8(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=16)
    #elif kType == 20:
    #    return kConv2DType8(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=32)
    #elif kType == 21:
    #    return kConv2DType8(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=16, always_intergroup=True)
    #elif kType == 22:
    #    return kConv2DType8(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=32, always_intergroup=True)
    #elif kType == kT3_32ch():
    #    return kConv2DType3(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=32)
    #elif kType == D6_64ch():
    #    return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=64)
    #elif kType == kT3_64ch():
    #    return kConv2DType3(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=64)
    #elif kType == D6_128ch():
    #    return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=128)
    #elif kType == kT3_128ch():
    #    return kConv2DType3(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=128)
    #elif kType == 28:
    #    return kConv2DType9(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=16, always_intergroup=True)
    #elif kType == 29:
    #    return kConv2DType9(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=32, always_intergroup=True)
    #elif kType == 30:
    #    return kConv2DType9(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=64, always_intergroup=True)
    #elif kType == 31:
    #    return kConv2DType9(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=128, always_intergroup=True)
    #elif kType == D6v3_16ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=16, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == D6v3_32ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=32, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == D6v3_64ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=64, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == D6v3_128ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=128, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == kT3v3_16ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=16, kernel_size=kernel_size, stride_size=stride_size, padding=padding, never_intergroup=True)
    #elif kType == kT3v3_32ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=32, kernel_size=kernel_size, stride_size=stride_size, padding=padding, never_intergroup=True)
    #elif kType == kT3v3_64ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=64, kernel_size=kernel_size, stride_size=stride_size, padding=padding, never_intergroup=True)
    #elif kType == kT3v3_128ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=128, kernel_size=kernel_size, stride_size=stride_size, padding=padding, never_intergroup=True)
    #elif kType == D6_12ch():
    #    return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=12)
    #elif kType == D6_24ch():
    #    return kConv2DType2(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=kernel_size, stride_size=stride_size, padding=padding, min_channels_per_group=24)
    #elif kType == D6v3_12ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=12, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == D6v3_24ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=24, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == D6v3_8ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=8, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == D6v3_4ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=4, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == D6v3_2ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=2, kernel_size=kernel_size, stride_size=stride_size, padding=padding)
    #elif kType == kT3v3_4ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=4, kernel_size=kernel_size, stride_size=stride_size, padding=padding, never_intergroup=True)
    #elif kType == kT3v3_8ch():
    #    return kConv2DType10(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, min_channels_per_group=8, kernel_size=kernel_size, stride_size=stride_size, padding=padding, never_intergroup=True)

def kPointwiseConv2D(last_tensor, filters=32, channel_axis=3, name=None, activation=None, has_batch_norm=True, has_batch_scale=True, use_bias=True, kType=2):
    """
    Parameter efficient pointwise convolution as shown in these papers:
    https://www.researchgate.net/publication/360226228_Grouped_Pointwise_Convolutions_Reduce_Parameters_in_Convolutional_Neural_Networks
    https://www.researchgate.net/publication/363413038_An_Enhanced_Scheme_for_Reducing_the_Complexity_of_Pointwise_Convolutions_in_CNNs_for_Image_Classification_Based_on_Interleaved_Grouped_Filters_without_Divisibility_Constraints
    """
    return kConv2D(last_tensor, filters=filters, channel_axis=channel_axis, name=name, activation=activation, has_batch_norm=has_batch_norm, has_batch_scale=has_batch_scale, use_bias=use_bias, kernel_size=1, stride_size=1, padding='same', kType=kType)


In [11]:


def kblock(inputs, activation_fn=swish, drop_rate=0., name='',
          filters_in=32, filters_out=16, kernel_size=3, strides=1,
          expand_ratio=1, se_ratio=0., id_skip=True, kType=1,
          dropout_all_blocks=False):
    """A mobile inverted residual block.
    # Arguments
        inputs: input tensor.
        activation_fn: activation function.
        drop_rate: float between 0 and 1, fraction of the input units to drop.
        name: string, block label.
        filters_in: integer, the number of input filters.
        filters_out: integer, the number of output filters.
        kernel_size: integer, the dimension of the convolution window.
        strides: integer, the stride of the convolution.
        expand_ratio: integer, scaling coefficient for the input filters.
        se_ratio: float between 0 and 1, fraction to squeeze the input filters.
        id_skip: boolean.
    # Returns
        output tensor for the block.
    """
    bn_axis = 3

    # Expansion phase
    filters = filters_in * expand_ratio
    
    if expand_ratio != 1:
        #x = layers.Conv2D(filters, 1,
        #                 padding='same',
        #                  use_bias=False,
        #                  kernel_initializer=CONV_KERNEL_INITIALIZER,
        #                  name=name + 'expand_conv')(inputs)
        #x = layers.BatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x)
        #x = layers.Activation(activation_fn, name=name + 'expand_activation')(x)
        x = cai.layers.kPointwiseConv2D(last_tensor=inputs, filters=filters, channel_axis=bn_axis, name=name+'expand', activation=activation_fn, has_batch_norm=True, use_bias=False, kType=kType)
    else:
        x = inputs

    # Depthwise Convolution
    if strides == 2:
        x = layers.ZeroPadding2D(padding=correct_pad(backend, x, kernel_size),
                                 name=name + 'dwconv_pad')(x)
        conv_pad = 'valid'
    else:
        conv_pad = 'same'
    x = layers.DepthwiseConv2D(kernel_size,
                               strides=strides,
                               padding=conv_pad,
                               use_bias=False,
                               depthwise_initializer=CONV_KERNEL_INITIALIZER,
                               name=name + 'dwconv')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=name + 'bn')(x)
    x = layers.Activation(activation_fn, name=name + 'activation')(x)

    # Squeeze and Excitation phase
    if 0 < se_ratio <= 1:
        filters_se = max(1, int(filters_in * se_ratio))
        se = layers.GlobalAveragePooling2D(name=name + 'se_squeeze')(x)
        if bn_axis == 1:
            se = layers.Reshape((filters, 1, 1), name=name + 'se_reshape')(se)
        else:
            se = layers.Reshape((1, 1, filters), name=name + 'se_reshape')(se)
        #se = layers.Conv2D(filters_se, 1,
        #                   padding='same',
        #                   activation=activation_fn,
        #                   kernel_initializer=CONV_KERNEL_INITIALIZER,
        #                   name=name + 'se_reduce')(se)
        print("kPointwiseConv2D 1v se = ", se)
        se = kPointwiseConv2D(last_tensor=se, filters=filters_se, channel_axis=bn_axis, name=name+'se_reduce', activation=activation_fn, has_batch_norm=False, use_bias=True, kType=kType)
        #se = layers.Conv2D(filters, 1,
        #                   padding='same',
        #                   activation='sigmoid',
        #                   kernel_initializer=CONV_KERNEL_INITIALIZER,
        #                   name=name + 'se_expand')(se)
        print("kPointwiseConv3D 2v se = ", se)
        se = kPointwiseConv2D(last_tensor=se, filters=filters, channel_axis=bn_axis, name=name+'se_expand', activation='sigmoid', has_batch_norm=False, use_bias=True, kType=kType)
        x = layers.multiply([x, se], name=name + 'se_excite')

    # Output phase
    #x = layers.Conv2D(filters_out, 1,
    #                  padding='same',
    #                  use_bias=False,
    #                  kernel_initializer=CONV_KERNEL_INITIALIZER,
    #                  name=name + 'project_conv')(x)
    # x = layers.BatchNormalization(axis=bn_axis, name=name + 'project_bn')(x)
    x = cai.layers.kPointwiseConv2D(last_tensor=x, filters=filters_out, channel_axis=bn_axis, name=name+'project_conv', activation=None, has_batch_norm=True, use_bias=False, kType=kType)

    if (drop_rate > 0)  and (dropout_all_blocks):
        x = layers.Dropout(drop_rate,
                noise_shape=(None, 1, 1, 1),
                name=name + 'drop')(x)

    if (id_skip is True and strides == 1 and filters_in == filters_out):
        if (drop_rate > 0)  and (not dropout_all_blocks):
            x = layers.Dropout(drop_rate,
                               noise_shape=(None, 1, 1, 1),
                               name=name + 'drop')(x)
        x = layers.add([x, inputs], name=name + 'add')
    return x

def kblockLastName(drop_rate=0., name='',
          filters_in=32, filters_out=16, strides=1,
          id_skip=True,
          dropout_all_blocks=False):
    last_name = name + 'project_conv'

    if (drop_rate > 0)  and (dropout_all_blocks):
        last_name = name + 'drop'

    if (id_skip is True and strides == 1 and filters_in == filters_out):
        last_name = name + 'add'
    return last_name


In [12]:
#https://github.com/joaopauloschuler/k-neural-api/blob/master/cai/efficientnet.py

def kEffNet2D(
        width_coefficient,
        depth_coefficient,
        skip_stride_cnt=-1,
        dropout_rate=0.2,
        drop_connect_rate=0.2,
        depth_divisor=8,
        activation_fn=swish,
        blocks_args=DEFAULT_BLOCKS_ARGS,
        model_name='efficientnet',
        include_top=True,
        input_tensor=None,
        input_shape=None,
        pooling=None,
        classes=1000,
        kType=2,
        concat_paths=True,
        dropout_all_blocks=False,
        name_prefix='k_',
        **kwargs):
    """Instantiates the EfficientNet architecture using given scaling coefficients.
    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.
    # Arguments
        width_coefficient: float, scaling coefficient for network width.
        depth_coefficient: float, scaling coefficient for network depth.
        skip_stride_cnt: number of layers to skip stride. This parameter is used with smalll images such as CIFAR-10.
        dropout_rate: float, dropout rate before final classifier layer.
        drop_connect_rate: float, dropout rate at skip connections.
        depth_divisor: integer, a unit of network width.
        activation_fn: activation function.
        blocks_args: list of dicts, parameters to construct block modules.
        model_name: string, model name.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        input_tensor: optional Keras tensor
            (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False.
            It should have exactly 3 inputs channels.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid input shape.
    """

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3

    def round_filters(filters, divisor=depth_divisor):
        """Round number of filters based on depth multiplier."""
        filters *= width_coefficient
        new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)
        # Make sure that round down does not go down by more than 10%.
        if new_filters < 0.9 * filters:
            new_filters += divisor
        return int(new_filters)

    def round_repeats(repeats):
        """Round number of repeats based on depth multiplier."""
        return int(math.ceil(depth_coefficient * repeats))

    if isinstance(kType, (int)):
        kTypeList = [kType]
    else:
        kTypeList = kType
    
    # Build stem
    x = img_input
    x = layers.ZeroPadding2D(padding=correct_pad(backend, x, 3),
                             name=name_prefix+'stem_conv_pad')(x)
    first_stride = 1 if skip_stride_cnt >= 0 else 2
    x = layers.Conv2D(round_filters(32), 3,
                      strides=first_stride,
                      padding='valid',
                      use_bias=False,
                      kernel_initializer=CONV_KERNEL_INITIALIZER,
                      name=name_prefix+'stem_conv')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=name_prefix+'stem_bn')(x)
    x = layers.Activation(activation_fn, name=name_prefix+'stem_activation')(x)

    root_layer = x
    output_layers = []
    path_cnt = 0
    for kType in kTypeList:
        x = root_layer
        blocks_args_cp = deepcopy(blocks_args)
        b = 0
        blocks = float(sum(args['repeats'] for args in blocks_args_cp))
        #only the first branch can backpropagate to the input.
        #if path_cnt>0:
        #    x = keras.layers.Lambda(lambda x: tensorflow.stop_gradient(x))(x)
        for (i, args) in enumerate(blocks_args_cp):
            assert args['repeats'] > 0
            # Update block input and output filters based on depth multiplier.
            args['filters_in'] = round_filters(args['filters_in'])
            args['filters_out'] = round_filters(args['filters_out'])

            for j in range(round_repeats(args.pop('repeats'))):
                #should skip the stride
                if (skip_stride_cnt > i) and (j == 0) and (args['strides'] > 1):
                    args['strides'] = 1
                # The first block needs to take care of stride and filter size increase.
                if (j > 0):
                    args['strides'] = 1
                    args['filters_in'] = args['filters_out']
                print("x = kblock before    ", x)
                x = kblock(x, activation_fn, drop_connect_rate * b / blocks,
                          name=name_prefix+'block{}{}_'.format(i + 1, chr(j + 97))+'_'+str(path_cnt), **args,
                          kType=kType, dropout_all_blocks=dropout_all_blocks)
                print("x = kblock after ", x)
                b += 1
        if (len(kTypeList)>1):
            x = layers.Activation('relu', name=name_prefix+'end_relu'+'_'+str(path_cnt))(x)
        output_layers.append(x)
        path_cnt = path_cnt +1
        
    if (len(output_layers)==1):
        x = output_layers[0]
    else:
        if concat_paths:
            x = keras.layers.Concatenate(axis=bn_axis, name=name_prefix+'global_concat')(output_layers)
        else:
            x = keras.layers.add(output_layers, name=name_prefix+'global_add')

    # Build top
    #x = layers.Conv2D(round_filters(1280), 1,
    #                  padding='same',
    #                  use_bias=False,
    #                  kernel_initializer=CONV_KERNEL_INITIALIZER,
    #                  name='top_conv')(x)
    #x = layers.BatchNormalization(axis=bn_axis, name='top_bn')(x)
    #x = layers.Activation(activation_fn, name='top_activation')(x)
    x = cai.layers.kPointwiseConv2D(last_tensor=x, filters=round_filters(1280), channel_axis=bn_axis, name=name_prefix+'top_conv', activation=None, has_batch_norm=True, use_bias=False, kType=kType)
    print("x = cai.layers.kPointwiseConv2D  ", x)
    if pooling == 'avg':
        x = layers.GlobalAveragePooling2D(name=name_prefix+'avg_pool')(x)
    elif pooling == 'max':
        x = layers.GlobalMaxPooling2D(name=name_prefix+'max_pool')(x)
    elif pooling == 'avgmax':
        x = cai.layers.GlobalAverageMaxPooling2D(x, name=name_prefix+'avgmax_pool')

    if include_top:
        if (dropout_rate > 0):
            x = layers.Dropout(dropout_rate, name=name_prefix+'top_dropout')(x)
        x = layers.Dense(classes,
            activation='softmax', # 'softmax'
            kernel_initializer=DENSE_KERNEL_INITIALIZER,
            name=name_prefix+'probs')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs, x, name=model_name)

    return model

In [13]:
def kEfficientNetB0(include_top=True,
                   input_tensor=None,
                   input_shape=None,
                   pooling='avg',
                   classes=1000,
                   kType=2,
                   dropout_rate=0.2,
                   drop_connect_rate=0.2,
                   skip_stride_cnt=-1,
                   activation_fn=swish,
                   dropout_all_blocks=False,
                   **kwargs):
    return kEffNet2D(1.0, 1.0, skip_stride_cnt=skip_stride_cnt, # 224,
                        model_name='kEffNet-b0',
                        include_top=include_top,
                        input_tensor=input_tensor, input_shape=input_shape,
                        pooling=pooling, classes=classes,
                        kType=kType,
                        dropout_rate=dropout_rate,
                        drop_connect_rate=drop_connect_rate,
                        activation_fn=activation_fn,
                        dropout_all_blocks=dropout_all_blocks,
                        **kwargs)

In [19]:
k = kEffNet2D(width_coefficient = 1.0, depth_coefficient = 1.0, input_shape=(32, 32, 1), skip_stride_cnt=3, kType = cai.layers.D6_32ch())

(32, 32) channels_last
is_inst True
x = kblock before     KerasTensor(type_spec=TensorSpec(shape=(None, 31, 31, 32), dtype=tf.float32, name=None), name='k_stem_activation/mul:0', description="created by layer 'k_stem_activation'")
kPointwiseConv2D 1v se =  KerasTensor(type_spec=TensorSpec(shape=(None, 1, 1, 32), dtype=tf.float32, name=None), name='k_block1a__0se_reshape/Reshape:0', description="created by layer 'k_block1a__0se_reshape'")
last_tensor   KerasTensor(type_spec=TensorSpec(shape=(None, 1, 1, 32), dtype=tf.float32, name=None), name='k_block1a__0se_reshape/Reshape:0', description="created by layer 'k_block1a__0se_reshape'")
prev_layer_channel_count  32
kPointwiseConv3D 2v se =  KerasTensor(type_spec=TensorSpec(shape=(None, 1, 1, 8), dtype=tf.float32, name=None), name='k_block1a__0se_reduce/mul:0', description="created by layer 'k_block1a__0se_reduce'")
last_tensor   KerasTensor(type_spec=TensorSpec(shape=(None, 1, 1, 8), dtype=tf.float32, name=None), name='k_block1a__0se_reduc

In [20]:
k.summary()

Model: "efficientnet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 32, 32, 1)]  0           []                               
                                                                                                  
 k_stem_conv_pad (ZeroPadding2D  (None, 33, 33, 1)   0           ['input_3[0][0]']                
 )                                                                                                
                                                                                                  
 k_stem_conv (Conv2D)           (None, 31, 31, 32)   288         ['k_stem_conv_pad[0][0]']        
                                                                                                  
 k_stem_bn (BatchNormalization)  (None, 31, 31, 32)  128         ['k_stem_conv[0][0]'] 

In [None]:
def work_on_efficientnet(show_model=False, run_fit=False, test_results=False, calc_f1=False):
    monitor='val_accuracy'
    if (calc_f1): 
        test_results=True
    if (show_model):
        input_shape = (target_size_x, target_size_y, 3)
    else:
        input_shape = (None, None, 3)
    for kType in [cai.layers.D6v3_16ch(), cai.layers.D6v3_32ch()]: #
        basefilename = '/content/drive/MyDrive/output/JP30I02-EfficientNet-CIFAR10-'+str(kType)
        best_result_file_name = basefilename+'-best_result.hdf5'
        print('Running: '+basefilename)
        model = cai.efficientnet.kEfficientNetB0(
            include_top=True,
            skip_stride_cnt=3,
            input_shape=input_shape,
            classes=num_classes,
            kType=kType)
        
        optimizer = keras.optimizers.RMSprop()
        optimizer = mixed_precision.LossScaleOptimizer(optimizer)
        model.compile(
            loss='categorical_crossentropy',
            optimizer=optimizer,
            metrics=['accuracy'])

        if (show_model): 
            model.summary(line_length=180)
            print('model flops:',get_flops(model))

        save_best = keras.callbacks.ModelCheckpoint(
                filepath=best_result_file_name,
                monitor=monitor,
                verbose=1,
                save_best_only=True,
                save_weights_only=False,
                mode='max',
                save_freq='epoch')

        if (run_fit): 
                train_flow = train_datagen.flow(
                    x_train, y_train,
                    batch_size=batch_size,
                    shuffle=True,
                    seed=seed,
                    subset='training'
                )
                validation_flow = train_datagen.flow(
                    x_train, y_train,
                    batch_size=batch_size,
                    shuffle=True,
                    seed=seed,
                    subset='validation'
                )
                history = model.fit(
                x = train_flow,
                epochs=epochs,
                batch_size=batch_size,
                validation_data=validation_flow,
                callbacks=[save_best, tf.keras.callbacks.LearningRateScheduler(cyclical_adv_lrscheduler25)],
                workers=cpus_num,
                max_queue_size=128
                )
                plt.figure()
                plt.ylabel("Accuracy (training and validation)")
                plt.xlabel("Epochs")
                plt.ylim([0,1])
                plt.plot(history.history["accuracy"])
                plt.plot(history.history["val_accuracy"])
        if (test_results):
            test_flow = test_datagen.flow(
                x_test, y_test,
                batch_size=batch_size,
                shuffle=True,
                seed=seed
            )
            print('Best Model Results: '+best_result_file_name)
            model = cai.models.load_kereas_model(best_result_file_name)
            evaluated = model.evaluate(
                x=test_flow,
                batch_size=batch_size,
                use_multiprocessing=False,
                workers=cpus_num
            )
            for metric, name in zip(evaluated,["loss","acc"]):
                print(name,metric)
        if (calc_f1):
            model = cai.models.load_kereas_model(best_result_file_name)
            pred_y = model.predict(x_test)
            print("Predicted Shape:", pred_y.shape)
            pred_classes_y = np.array(list(np.argmax(pred_y, axis=1)))
            test_classes_y = np.array(list(np.argmax(y_test, axis=1)))
            print("Pred classes shape:",pred_classes_y.shape)
            print("Test classes shape:",test_classes_y.shape)
            report = classification_report(test_classes_y, pred_classes_y, digits=4)
            print(report)
        print('Finished: '+basefilename)

In [None]:
if channels_per_group==2:
   kTypes = [cai.layers.D6v3_2ch()]
elif channels_per_group==4:
   kTypes = [cai.layers.D6v3_4ch()]
elif channels_per_group==8:
   kTypes = [cai.layers.D6v3_8ch()]
elif channels_per_group==12:
   kTypes = [cai.layers.D6v3_12ch()]
elif channels_per_group==16:
   kTypes = [cai.layers.D6v3_16ch()]
else:
   kTypes = [cai.layers.D6v3_32ch()]