In [136]:
import tensorflow as tf

import maxvit.models.hparams as hparams
import maxvit.models.maxvit as layers

import logging

from zoobot.tensorflow.estimators import efficientnet_custom, efficientnet_standard, custom_layers

import numpy as np

In [107]:
model_name = 'MaxViTTiny'
config = hparams.lookup(model_name)

In [108]:
config.train.image_size = 300
config

{'__maxvit__': 1, 'dataset': 'imagenet', 'double_transpose': True, 'input': {'augname': 'randaug', 'ra_num_layers': 2, 'ra_magnitude': 15, 'mixup_alpha': 0.8, 'cutmix_alpha': 0.0, 'legacy_preprocess': True}, 'loss': {'xent_type': 'softmax', 'label_smoothing': 0.1}, 'train': {'split': None, 'image_size': 300, 'epochs': 300, 'batch_size': 4096, 'optimizer': 'adamw', 'lr_schedule': {'type': 'cosine', 'warmup_steps': 10000, 'warmup_epochs': None, 'lr_max': 0.003, 'lr_min': 1e-05}, 'weight_decay': 0.05, 'weight_decay_exclude': '.*(bias|scale|gain|gamma|beta).*', 'ema_decay': 0.9999, 'grad_clip': 1.0, 'steps': None}, 'eval': {'split': None, 'image_size': 224, 'batch_size': 16, 'steps': None}, 'path': {'ckpt_dir': None}, 'tpu': {'iterations_per_loop': 5000, 'save_checkpoints_steps': 5000, 'keep_checkpoint_max': 0, 'keep_checkpoint_every_n_hours': 4, 'use_bfloat16': True}, 'init': {'warm_start_mode': 'restore_train', 'warm_start_from': None}, 'model': {'block_type': ['maxvit', 'maxvit', 'maxvi

In [109]:
config.eval.image_size

224

In [110]:
model = layers.MaxViT(config.model)

Try making the MaxVit plus keras blocks during the hack and check what it looks like.

Make an if statement about using the head or not from the actual MaxViT data. Remove it in the MaxVIT code!!

### Building the Model

In [158]:
input_shape = (300, 300, 3)

In [159]:
model = tf.keras.Sequential([])

In [160]:
model.add(tf.keras.layers.InputLayer(input_shape = input_shape))

In [161]:
crop_size = int(300 * 0.75)
resize_size = 224
always_augment = False

In [162]:
def add_augmentation_layers(
    model,
    crop_size=crop_size,
    resize_size=resize_size,
    always_augment=always_augment
):
    """
    Add image augmentation layers to end of ``model``.

    The following augmentations are applied, in order:
        - Random rotation (aliased)
        - Random flip (horizontal and/or vertical)
        - Random crop (not centered) down to ``(crop_size, crop_size)``
        - Resize down to ``(resize_size, resize_size)``

    If crop_size is within 10 of resize_size, resizing is skipped and instead the image is cropped directly to `resize_size`.
    This is both faster and avoids information loss from aliasing.
    I strongly suggest this approach if possible.

    Model (probably tf.keras.Sequential) is modified in-place so this func. returns None.

    TODO I would prefer to refactor this so augmentations are separate from the model, as with pytorch.
    But it's not a high priority change.

    Args:
        model (tf.keras.Model): Model to add augmentation layers. Layers are added at *end*, so likely an empty model e.g. tf.keras.Sequential()
        crop_size (int): desired length of image after random crop (assumed square)
        resize_size (int): desired length of image after resizing (assumed square)
        always_augment (bool, optional): If True, augmentations also happen at test time. Defaults to False.
    """
    if crop_size < resize_size:
        logging.warning('Crop size {} < final size {}, losing resolution'.format(
            crop_size, resize_size))

    resize = True
    if np.abs(crop_size - resize_size) < 10:
        logging.warning(
            'Crop size and final size are similar: skipping resizing and cropping directly to resize_size (ignoring crop_size)')
        resize = False
        crop_size = resize_size

    if always_augment:
        rotation_layer = custom_layers.PermaRandomRotation
        flip_layer = custom_layers.PermaRandomFlip
        crop_layer = custom_layers.PermaRandomCrop
    else:
        rotation_layer = tf.keras.layers.experimental.preprocessing.RandomRotation
        flip_layer = tf.keras.layers.experimental.preprocessing.RandomFlip
        crop_layer = tf.keras.layers.experimental.preprocessing.RandomCrop


    # np.pi fails with tf 2.5
    model.add(rotation_layer(0.5, fill_mode='reflect'))  # rotation range +/- 0.25 * 2pi i.e. +/- 90*.
    model.add(flip_layer())
    model.add(crop_layer(crop_size, crop_size))
    if resize:
        logging.info('Using resizing, to {}'.format(resize_size))
        model.add(tf.keras.layers.experimental.preprocessing.Resizing(
            resize_size, resize_size, interpolation='bilinear'
        ))

In [163]:
add_augmentation_layers(model, crop_size=crop_size, resize_size=resize_size,always_augment=always_augment)



In [170]:
config

{'__maxvit__': 1, 'dataset': 'imagenet', 'double_transpose': True, 'input': {'augname': 'randaug', 'ra_num_layers': 2, 'ra_magnitude': 15, 'mixup_alpha': 0.8, 'cutmix_alpha': 0.0, 'legacy_preprocess': True}, 'loss': {'xent_type': 'softmax', 'label_smoothing': 0.1}, 'train': {'split': None, 'image_size': 300, 'epochs': 300, 'batch_size': 4096, 'optimizer': 'adamw', 'lr_schedule': {'type': 'cosine', 'warmup_steps': 10000, 'warmup_epochs': None, 'lr_max': 0.003, 'lr_min': 1e-05}, 'weight_decay': 0.05, 'weight_decay_exclude': '.*(bias|scale|gain|gamma|beta).*', 'ema_decay': 0.9999, 'grad_clip': 1.0, 'steps': None}, 'eval': {'split': None, 'image_size': 224, 'batch_size': 16, 'steps': None}, 'path': {'ckpt_dir': None}, 'tpu': {'iterations_per_loop': 5000, 'save_checkpoints_steps': 5000, 'keep_checkpoint_max': 0, 'keep_checkpoint_every_n_hours': 4, 'use_bfloat16': True}, 'init': {'warm_start_mode': 'restore_train', 'warm_start_from': None}, 'model': {'block_type': ['maxvit', 'maxvit', 'maxvi

In [78]:
maxvit_model = layers.MaxViT(config.model)

In [99]:
config.model.hidden_size

[64, 128, 256, 512]

In [181]:
config

{'__maxvit__': 1, 'dataset': 'imagenet', 'double_transpose': True, 'input': {'augname': 'randaug', 'ra_num_layers': 2, 'ra_magnitude': 15, 'mixup_alpha': 0.8, 'cutmix_alpha': 0.0, 'legacy_preprocess': True}, 'loss': {'xent_type': 'softmax', 'label_smoothing': 0.1}, 'train': {'split': None, 'image_size': 300, 'epochs': 300, 'batch_size': 4096, 'optimizer': 'adamw', 'lr_schedule': {'type': 'cosine', 'warmup_steps': 10000, 'warmup_epochs': None, 'lr_max': 0.003, 'lr_min': 1e-05}, 'weight_decay': 0.05, 'weight_decay_exclude': '.*(bias|scale|gain|gamma|beta).*', 'ema_decay': 0.9999, 'grad_clip': 1.0, 'steps': None}, 'eval': {'split': None, 'image_size': 224, 'batch_size': 16, 'steps': None}, 'path': {'ckpt_dir': None}, 'tpu': {'iterations_per_loop': 5000, 'save_checkpoints_steps': 5000, 'keep_checkpoint_max': 0, 'keep_checkpoint_every_n_hours': 4, 'use_bfloat16': True}, 'init': {'warm_start_mode': 'restore_train', 'warm_start_from': None}, 'model': {'block_type': ['maxvit', 'maxvit', 'maxvi

In [175]:
config.model

[2, 2, 5, 2]

In [164]:
effnet_model = efficientnet_standard.EfficientNetB0

In [165]:
effnet = efficientnet_custom.define_headless_efficientnet(  # from efficientnet_custom.py
                                                                    # defines efficientnet model to train
                                                                    # direct to maxvit_standard.py instead!
            input_shape=(224,224,3),
            get_effnet=effnet_model,  # model
            # further kwargs will be passed to get_effnet
            use_imagenet_weights=False,
        )
model.add(effnet)  # modify`

In [166]:
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(custom_layers.PermaDropout(0.8, name='top_dropout'))
efficientnet_custom.custom_top_dirichlet(model, 34)  # inplace

In [167]:
model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

In [168]:
model.build([300,300,3])

In [169]:
model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 random_rotation_7 (RandomRo  (None, 300, 300, 3)      0         
 tation)                                                         
                                                                 
 random_flip_7 (RandomFlip)  (None, 300, 300, 3)       0         
                                                                 
 random_crop_7 (RandomCrop)  (None, 224, 224, 3)       0         
                                                                 
 sequential_12 (Sequential)  (None, 7, 7, 1280)        4049564   
                                                                 
 global_average_pooling2d_3   (None, 1280)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 top_dropout (PermaDropout)  (None, 1280)            