In [1]:
%%capture
!pip install ipywidgets

In [2]:
# ! wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
# ! wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar

In [2]:
import torch

from torchvision import datasets

In [3]:
dataset_train = datasets.ImageNet('/notebooks/imagenet/')

In [4]:
dataset_train

Dataset ImageNet
    Number of datapoints: 1281167
    Root location: /notebooks/imagenet/
    Split: train

In [5]:
%%capture
! pip install git+https://github.com/keepsimpler/sunyata
! pip install pytorch-lightning
! pip install pytorch-lightning-bolts
! pip install einops

In [None]:
! pip install timm

In [6]:
import torch
import torchvision
torch.__version__, torchvision.__version__

('1.12.0+cu116', '0.13.0+cu116')

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms

import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning.callbacks import LearningRateMonitor

from sunyata.pytorch.data.tiny_imagenet import TinyImageNet, TinyImageNetDataModule

from sunyata.pytorch.arch.base import BaseModule, Residual




In [4]:
from sunyata.pytorch.arch.convnext2 import ConvNext, ConvNextCfg, convnext_tiny

In [5]:
cfg = ConvNextCfg(
    drop_path = 0.1,
    model_ema = True,
    model_ema_eval = True,

    num_workers = 8,
)
cfg

ConvNextCfg(batch_size=64, epochs=300, update_freq=1, drop_path=0.1, input_size=224, layer_scale_init_value=1e-06, model_ema=True, model_ema_decay=0.9999, model_ema_force_cpu=False, model_ema_eval=True, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.05, weight_decay_end=None, lr=0.004, layer_decay=1.0, min_lr=1e-06, warmup_epochs=20, warmup_steps=-1, color_jitter=0.4, aa='rand-m9-mstd0.5-incl', smoothing=0.1, train_interpolation='bicubic', crop_pct=None, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', head_init_scale=1.0, data_path=None, eval_data_path=None, nb_classes=1000, imagenet_default_mean_and_std=True, data_set='IMNET', output_dir='', log_dir=None, device='cuda', seed=0, resume='', auto_resume=True, save_ckpt=True, save_ckpt_freq=1, save_ckpt_num=3, start_epoch=0, eval=False, dist_eval=True, disable_eval=False, num_w

In [6]:
from timm.data.constants import \
    IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
from timm.data import create_transform
from timm.data.mixup import Mixup
from timm.models import create_model
from timm.models.registry import register_model
from timm.utils import ModelEma

In [7]:
mean = IMAGENET_INCEPTION_MEAN if not cfg.imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
std = IMAGENET_INCEPTION_STD if not cfg.imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD

In [20]:
transform = create_transform(
    input_size = cfg.input_size,
    is_training = True,
    color_jitter = cfg.color_jitter,
    auto_augment = cfg.aa,
    interpolation = cfg.train_interpolation,
    re_prob = cfg.reprob,
    re_mode = cfg.remode,
    re_count = cfg.recount,
    mean = mean,
    std = std,
)

In [21]:
resize_im = cfg.input_size > 32
if not resize_im:
    transform.transforms[0] = transforms.RandomCrop(
        cfg.input_size, padding=4
    )

In [25]:
dataset_train = datasets.ImageNet('/notebooks/imagenet/', transform=transform)

In [26]:
val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

In [27]:
dataset_val = datasets.ImageNet('/notebooks/imagenet/', split='val', transform=val_transform)

In [29]:
data_loader_train = torch.utils.data.DataLoader(
    dataset_train,
    batch_size = cfg.batch_size,
    num_workers = cfg.num_workers,
    pin_memory = cfg.pin_mem,
    drop_last = True,
)

In [30]:
data_loader_val = torch.utils.data.DataLoader(
    dataset_val,
    batch_size = int(1.5 * cfg.batch_size),
    num_workers = cfg.num_workers,
    pin_memory = cfg.pin_mem,
    drop_last = False,
)

In [8]:
mixup_active = cfg.mixup > 0 or cfg.cutmix > 0. or cfg.cutmix_minmax is not None
mixup_active

True

In [9]:
mixup_fn = Mixup(
    mixup_alpha = cfg.mixup, cutmix_alpha = cfg.cutmix,
    cutmix_minmax = cfg.cutmix_minmax,
    prob = cfg.mixup_prob,
    switch_prob = cfg.mixup_switch_prob,
    mode = cfg.mixup_mode,
    label_smoothing = cfg.smoothing,
    num_classes = cfg.nb_classes
)

In [10]:
model = create_model(
    'convnext_tiny',
    pretrained=False, 
    pretrained_cfg=None,
    num_classes = cfg.nb_classes,
    drop_path_rate = cfg.drop_path,
    layer_scale_init_value = cfg.layer_scale_init_value,
    head_init_scale = cfg.head_init_scale,
)

In [14]:
device = torch.device(cfg.device)
device

device(type='cuda')

In [15]:
model_ema = None
if cfg.model_ema:
    model_ema = ModelEma(
        model,
        decay = cfg.model_ema_decay,
        device = 'cpu' if cfg.model_ema_force_cpu else '',
        resume=''
    )

In [17]:
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
n_parameters

28589128

In [None]:
total_batch_size = cfg.batch_size * cfg.update_freq
num_training_steps_per_epoch = len(dataset_train) // total_batch_size

In [8]:
model = ResNext50(cfg)

pl.utilities.model_summary.summarize(model,max_depth=1)





  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 23.4 M
---------------------------------
23.4 M    Trainable params
0         Non-trainable params
23.4 M    Total params
93.559    Total estimated model params size (MB)

In [9]:
trainer = pl.Trainer(
    # progress_bar_refresh_rate=10,
    precision=16,
    max_epochs=cfg.num_epochs,
    accelerator='gpu',
    devices=1,
    enable_checkpointing=False,
    logger=pl_loggers.CSVLogger("lightning_logs/", name="convmixer"),
#     callbacks=[LearningRateMonitor(logging_interval="step")],
)    


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:

trainer.fit(model, tiny_image_net_datamodule)
