In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from timm.models import create_model

from sunyata.pytorch.arch.convnext2 import ConvNext, ConvNextCfg, convnext_tiny, attnconvnext_tiny

In [2]:
cfg = ConvNextCfg(
    drop_path = 0.1,
    model_ema = True,
    model_ema_eval = True,

    num_workers = 8,
)
cfg

ConvNextCfg(batch_size=64, epochs=300, update_freq=1, drop_path=0.1, input_size=224, layer_scale_init_value=1e-06, model_ema=True, model_ema_decay=0.9999, model_ema_force_cpu=False, model_ema_eval=True, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.05, weight_decay_end=None, lr=0.004, layer_decay=1.0, min_lr=1e-06, warmup_epochs=20, warmup_steps=-1, color_jitter=0.4, aa='rand-m9-mstd0.5-incl', smoothing=0.1, train_interpolation='bicubic', crop_pct=None, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', head_init_scale=1.0, data_path=None, eval_data_path=None, nb_classes=1000, imagenet_default_mean_and_std=True, data_set='IMNET', output_dir='', log_dir=None, device='cuda', seed=0, resume='', auto_resume=True, save_ckpt=True, save_ckpt_freq=1, save_ckpt_num=3, start_epoch=0, eval=False, dist_eval=True, disable_eval=False, num_w

In [10]:
model = create_model(
    'convnext_tiny',
    pretrained=False, 
    pretrained_cfg=None,
    num_classes = cfg.nb_classes,
    drop_path_rate = cfg.drop_path,
    layer_scale_init_value = cfg.layer_scale_init_value,
    head_init_scale = cfg.head_init_scale,
)

In [11]:
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
n_parameters

28589128

In [12]:
input = torch.randn(2,3,256,256)

In [13]:
model(input)

tensor([[ 0.4113,  0.0209, -0.2038,  ..., -0.1086, -0.3844, -0.0825],
        [ 0.5285, -0.3509, -0.1884,  ..., -0.3615,  1.0245, -0.3569]],
       grad_fn=<AddmmBackward0>)

In [14]:
attn_model = create_model(
    'attnconvnext_tiny',
    pretrained=False, 
    pretrained_cfg=None,
    num_classes = cfg.nb_classes,
    drop_path_rate = cfg.drop_path,
    layer_scale_init_value = cfg.layer_scale_init_value,
    head_init_scale = cfg.head_init_scale,
)

In [15]:
n_parameters = sum(p.numel() for p in attn_model.parameters() if p.requires_grad)
n_parameters

28597192

In [16]:
attn_model(input)

tensor([[ 0.3824,  0.4885, -0.1053,  ...,  0.0507,  0.3630, -0.0288],
        [ 0.1484,  0.1502, -1.1006,  ...,  0.6671, -0.3909,  0.3619]],
       grad_fn=<AddmmBackward0>)