In [1]:
import torch
import torch.nn as nn
import torchvision.transforms.functional as F

import torch.optim as optim
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

from torch.cuda.amp import autocast, GradScaler
from torch.nn.utils import clip_grad_norm_
from tqdm import tqdm
import time

from timm.data import Mixup
from timm.utils import ModelEmaV3
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
import transformers

from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from torchsummary import summary

from model.fcmae import convnextv2_fcmae_tiny
import math
import warnings
from torch.optim.lr_scheduler import _LRScheduler

class CosineWarmupScheduler(_LRScheduler):
    def __init__(self, optimizer, num_warmup_steps, num_training_steps, num_cycles=0.5, min_lr=1e-6, last_epoch=-1, verbose=False):
        self.num_warmup_steps = num_warmup_steps
        self.num_training_steps = num_training_steps
        self.num_cycles = num_cycles
        self.min_lr = min_lr
        self.base_lrs = [group['lr'] for group in optimizer.param_groups]
        super().__init__(optimizer, last_epoch, verbose)

    def get_lr(self):
        if not self._get_lr_called_within_step:
            warnings.warn("To get the last learning rate computed by the scheduler, "
                          "please use `get_last_lr()`.", UserWarning)
        
        lrs = []
        for base_lr in self.base_lrs:
            if self.last_epoch < self.num_warmup_steps:
                # Linear warmup
                lr = (base_lr - self.min_lr) * self.last_epoch / max(1, self.num_warmup_steps) + self.min_lr
            else:
                # Cosine annealing
                progress = (self.last_epoch - self.num_warmup_steps) / max(1, self.num_training_steps - self.num_warmup_steps)
                lr = self.min_lr + (base_lr - self.min_lr) * 0.5 * (1 + math.cos(math.pi * self.num_cycles * 2.0 * progress))
            lrs.append(lr)
        return lrs

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = convnextv2_fcmae_tiny()
model

FCMAE(
  (encoder): SparseConvNeXtV2(
    (stem): Sequential(
      (stem_conv): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (stem_ln): LayerNorm()
    )
    (downsample_layers): ModuleList(
      (0): Sequential(
        (stem_conv): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (stem_ln): LayerNorm()
      )
      (1): Sequential(
        (ds_ln0): MinkowskiLayerNorm(
          (ln): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
        )
        (ds_conv1): MinkowskiConvolution(in=96, out=192, kernel_size=[2, 2], stride=[1, 1], dilation=[1, 1])
      )
      (2): Sequential(
        (ds_ln1): MinkowskiLayerNorm(
          (ln): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
        )
        (ds_conv2): MinkowskiConvolution(in=192, out=384, kernel_size=[2, 2], stride=[1, 1], dilation=[1, 1])
      )
      (3): Sequential(
        (ds_ln2): MinkowskiLayerNorm(
          (ln): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        )
        (