In [None]:
!pip install pytorch-lightning
!pip install torchmetrics==0.11.4

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.2.4-py3-none-any.whl (802 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m802.2/802.2 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.3.2-py3-none-any.whl (841 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m841.5/841.5 kB[0m [31m57.4 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.0->pytorch-lightning)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.0->pytorch-lightning)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13.0->pytor

In [None]:
!pip install timm albumentations



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# cd "/content/drive/My Drive/cvopen/train"

In [None]:
# !unzip -qq "/content/drive/MyDrive/cvopen/train.zip"

In [None]:
import os
import gc
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import pytorch_lightning as L
import cv2
import albumentations as A
import timm

from albumentations.pytorch import ToTensorV2
from glob import glob
from PIL import Image
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from torchvision.io import read_image
from torchvision.transforms import v2 as  transforms
from torch.utils.data import Dataset, DataLoader
from transformers import Swinv2Config, Swinv2Model, AutoImageProcessor, AutoModelForImageClassification
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

In [None]:
CFG = {
    'SEED':41,
    'IMG_SIZE':224,
    'BATCH_SIZE': 12,
    'EPOCHS':100,
}

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
def seed_everything(seed):
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED'])

In [None]:
dfs = pd.read_csv('/content/drive/MyDrive/cvopen/train.csv')
dfs.head()

Unnamed: 0,img_path,upscale_img_path,label
0,./train/TRAIN_00000.jpg,./upscale_train/TRAIN_00000.png,Ruddy Shelduck
1,./train/TRAIN_00001.jpg,./upscale_train/TRAIN_00001.png,Gray Wagtail
2,./train/TRAIN_00002.jpg,./upscale_train/TRAIN_00002.png,Indian Peacock
3,./train/TRAIN_00003.jpg,./upscale_train/TRAIN_00003.png,Common Kingfisher
4,./train/TRAIN_00004.jpg,./upscale_train/TRAIN_00004.png,Common Kingfisher


In [None]:
dfs['label'].value_counts()

label
Ruddy Shelduck               671
White-Breasted Kingfisher    666
Red-Wattled Lapwing          663
Northern Lapwing             663
Gray Wagtail                 661
White Wagtail                659
Jungle Babbler               658
Sarus Crane                  657
Common Kingfisher            657
Common Rosefinch             653
White-Breasted Waterhen      649
Indian Grey Hornbill         645
Common Tailorbird            643
Brown-Headed Barbet          642
Cattle Egret                 641
Indian Roller                639
Indian Peacock               637
Hoopoe                       631
House Crow                   630
Common Myna                  630
Forest Wagtail               629
Rufous Treepie               616
Asian Green Bee-Eater        613
Coppersmith Barbet           550
Indian Pitta                 431
Name: count, dtype: int64

In [None]:
class CustomDataset(Dataset):
    def __init__(self, df, root_dir, img_col,  mode = 'train', transforms = None):
      self.df = df
      self.root_dir = root_dir
      self.img_col = img_col
      self.mode = mode
      self.transforms = transforms

    def __len__(self):
      return len(self.df)

    def __getitem__(self, idx):
      row = self.df.iloc[idx]
      img_path = os.path.join(self.root_dir, row[self.img_col][2:])
      image = Image.open(img_path)
      image = np.array(image)
      # image = cv2.imread(img_path)

      if self.transforms:
          image = self.transforms(image = image)['image']

      if self.mode in ['train', 'val']:
        label = row['label']

        return {
            'image': image,
            'label': label
        }

      elif self.mode == 'test':

        return {
            'image': image
        }

In [None]:
class CustomCollateFn:
    def __init__(self, mode):
      self.mode = mode

    def __call__(self, batch):

      images = [item['image'] for item in batch]

      if self.mode in ['train', 'val']:
        labels = [item['label'] for item in batch]

        return {
          'images': torch.stack(images),
          'labels': torch.LongTensor(labels)
        }

      elif self.mode == 'test':

        return {
          'images': torch.stack(images)
        }

      else:
        raise ValueError('Invalid mode.')

In [None]:
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

train_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE'], interpolation=cv2.INTER_CUBIC),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Normalize(mean=mean, std=std),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE'], interpolation=cv2.INTER_CUBIC),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Normalize(mean=mean, std=std),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE'], interpolation=cv2.INTER_CUBIC),
    A.Normalize(mean=mean, std=std),
    ToTensorV2()
])

train_collate_fn = CustomCollateFn('train')
val_collate_fn = CustomCollateFn('val')

In [None]:
def accuracy(pred, true):
  _, preds = torch.max(pred, dim=1)
  return torch.tensor(torch.sum(preds == true).item() / len(preds))

In [None]:
class EarlyStopping:
    def __init__(self, patience=10, verbose=False, delta=0):
      self.patience = patience
      self.verbose = verbose
      self.counter = 0
      self.best_score = None
      self.early_stop = False
      self.val_loss_min = np.Inf
      self.delta = delta

    def __call__(self, score):
      if self.best_score is None:
        self.best_score = score
      elif score < self.best_score + self.delta:
        self.counter += 1
        print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
        if self.counter >= self.patience:
          self.early_stop = True
      else:
        self.best_score = score
        self.counter = 0

      return self.early_stop

In [None]:
from tqdm.auto import notebook_tqdm

In [None]:
def train(model, train_loader, optimizer, criterion, device):
  model.train()

  train_loss = 0.0
  train_acc = 0.0

  tqdm_bar = notebook_tqdm(train_loader, desc='Training')
  for batch_idx, batch_data in enumerate(tqdm_bar):

    images, labels = batch_data['images'].to(device), batch_data['labels'].to(device)

    optimizer.zero_grad()

    outputs = model(images)

    loss = criterion(outputs, labels)
    acc = accuracy(outputs, labels)

    loss.backward()
    optimizer.step()

    train_loss += loss.item()
    train_acc += acc.item()

    avg_train_loss = train_loss / (batch_idx + 1)
    avg_train_acc = train_acc / (batch_idx + 1)

    tqdm_bar.set_postfix(
        {
           'Train Loss': f'[{avg_train_loss:.5f}]',
           'Train Acc': f'[{avg_train_acc:.5f}]'
        }
      )

  return train_loss / len(train_loader), train_acc / len(train_loader)

def validate(model, val_loader, criterion, device):
  model.eval()

  val_loss = 0.0
  val_acc = 0.0

  with torch.no_grad():
    tqdm_bar = notebook_tqdm(val_loader, desc='Validing')
    for batch_idx, batch_data in enumerate(tqdm_bar):

      images, labels = batch_data['images'].to(device), batch_data['labels'].to(device)

      outputs = model(images)

      loss = criterion(outputs, labels)
      acc = accuracy(outputs, labels)

      val_loss += loss.item()
      val_acc = acc.item()

      avg_val_loss = val_loss / (batch_idx + 1)
      avg_val_acc = val_acc / (batch_idx + 1)

      tqdm_bar.set_postfix(
        {
           'Val Loss': f'[{avg_val_loss:.5f}]',
           'Val Acc': f'[{avg_val_acc:.5f}]'
        }
      )

  return val_loss / len(val_loader), val_acc / len(val_loader)

In [None]:
# class CustomModel(nn.Module):
#     def __init__(self, model):
#         super(CustomModel, self).__init__()
#         self.model = model
#         self.clf = nn.Sequential(
#             nn.Tanh(),
#             nn.LazyLinear(25),
#         )

#     # @torch.compile
#     def forward(self, x, label=None):
#         x = self.model(x).pooler_output
#         x = self.clf(x)
#         loss = None
#         if label is not None:
#             loss = nn.CrossEntropyLoss()(x, label)
#         probs = nn.LogSoftmax(dim=-1)(x)
#         return probs, loss

In [None]:
model = timm.create_model("timm/maxvit_large_tf_224.in1k", pretrained=True, num_classes=25)
model.to(device)
model = torch.nn.DataParallel(model)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = torch.nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True, min_lr=1e-6)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state = CFG['SEED'])
le = LabelEncoder()
dfs['label'] = le.fit_transform(dfs['label'])
cl = dfs['label']

for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(cl)), cl)):
  df_train = dfs.loc[train_idx, :]
  df_val = dfs.loc[val_idx, :]

  train_dataset = CustomDataset(df = df_train, root_dir = '/content/drive/MyDrive/cvopen/train', img_col = 'img_path', mode = 'train', transforms = train_transform)
  val_dataset = CustomDataset(df = df_val, root_dir = '/content/drive/MyDrive/cvopen/train', img_col = 'img_path', mode = 'val', transforms = val_transform)

  train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], collate_fn = train_collate_fn, shuffle = True, num_workers=12)
  val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], collate_fn = val_collate_fn, shuffle = False, num_workers=12)

  best_checkpoint_path = f'swinv2-large-resize-fold_idx={fold}'

  early_stopping_counter = 0
  # early_stopping = EarlyStopping(patience=5, verbose=True)
  best_val_loss = float('inf')

  for epoch in range(1, CFG['EPOCHS'] + 1):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)

    if scheduler is not None:
      scheduler.step(val_loss)

    print(f'Epoch [{epoch}], Train Loss : [{val_loss:.5f}] Train Acc : [{val_acc:.5f}] Val Loss : [{val_loss:.5f}] Val Acc : [{val_acc:.5f}]')

    if val_loss < best_val_loss:
      best_val_loss = val_loss
      best_val_acc = val_acc
      best_checkpoint = {
          'epoch': epoch,
          'model_state_dict': model.state_dict(),
          'optimizer_state_dict': optimizer.state_dict(),
          'best_val_loss': best_val_loss,
          'best_val_acc': best_val_acc,
      }

      torch.save(best_checkpoint, best_checkpoint_path)
      early_stopping_counter = 0

      print('Best checkpoint saved..')

    else:
      early_stopping_counter += 1

      if early_stopping_counter >= 10:
      #if early_stopping(val_loss):
        break

  print(f'Fold {fold} Best Loss: {best_val_loss:.5f} Best Acc: {best_val_acc:.5f}')

  del model, optimizer, criterion, train_dataset, val_dataset, train_loader, val_loader
  torch.cuda.empty_cache()
  gc.collect()



Training:   0%|          | 0/1056 [00:00<?, ?it/s]

  self.pid = os.fork()
  self.pid = os.fork()


Validing:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.23035] Train Acc : [0.00344] Val Loss : [0.23035] Val Acc : [0.00344]
Best checkpoint saved..


Training:   0%|          | 0/1056 [00:00<?, ?it/s]

Validing:   0%|          | 0/264 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.24410] Train Acc : [0.00344] Val Loss : [0.24410] Val Acc : [0.00344]


Training:   0%|          | 0/1056 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 1.06 MiB is free. Process 43544 has 14.74 GiB memory in use. Of the allocated memory 13.80 GiB is allocated by PyTorch, and 810.87 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
df_test = pd.read_csv('/content/drive/MyDrive/cvopen/test.csv')
test_dataset = CustomDataset(df = df_test, root_dir = '/content/drive/MyDrive/cvopen/', img_col = 'img_path', mode = 'test', transforms = test_transform)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle = False, num_workers=4)

model = timm.create_model('swin_large_patch4_window7_224.ms_in22k', pretrained=False, num_classes=25)
model.to(device)
model = torch.nn.DataParallel(model)

In [None]:
checkpoint = torch.load(best_checkpoint_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])

In [None]:
def inference(model, test_loader, device):
  model.eval()
  predictions = []

  for images, _ in tqdm(iter(test_loader)):
    imgs = imgs.float().to(device)

    with torch.no_grad():
      outputs = model(images)
      _, preds = torch.max(outputs, 1)
      predictions.extend(preds.cpu().numpy())

  le.inverse_transform(predictions)

  return preds

In [None]:
submission_df = pd.read_csv('/content/drive/MyDrive/cvopen/sample_submission.csv')
submission_df['label'] = inference(model, test_loader)
submission_df.to_csv('./answer.csv', index=False)

Memory Issue -> Pytorch Lightning

In [None]:
!pip install --quiet timm pytorch_lightning==1.7.7 torchmetrics==0.11.1

In [None]:
# !pip install wandb

In [None]:
import os
import gc
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import cv2
import pytorch_lightning as pl
import albumentations as A
import timm

from albumentations.pytorch import ToTensorV2
from glob import glob
from PIL import Image
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from torchvision.io import read_image
from torchvision.transforms import v2 as  transforms
from torch.utils.data import Dataset, DataLoader
from torchmetrics import Accuracy
from transformers import Swinv2Config, Swinv2Model, AutoImageProcessor, AutoModelForImageClassification
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import WandbLogger

In [None]:
# import wandb

# try:
#     from kaggle_secrets import UserSecretsClient
#     user_secrets = UserSecretsClient()
#     secret_value_0 = user_secrets.get_secret("wandb_api")
#     wandb.login(key = secret_value_0)
#     anony = None
# except:
#     anony = "must"
#     print('If you want to use your W&B account, \
#           go to Add-ons -> Secrets and provide your W&B access token. \
#           Use the Label name as wandb_api. \nGet your W&B access token from here: https://wandb.ai/authorize')

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
class CustomDataset(Dataset):
    def __init__(self, df, path_col,  mode='train'):
        self.df = df
        self.path_col = path_col
        self.mode = mode

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if self.mode == 'train':
            row = self.df.iloc[idx]
            image = read_image(row[self.path_col])/256.
            label = row['class']
            data = {
                'image':image,
                'label':label
            }
            return data
        elif self.mode == 'val':
            row = self.df.iloc[idx]
            image = read_image(row[self.path_col])/256.
            label = row['class']
            data = {
                'image':image,
                'label':label
            }
            return data
        elif self.mode == 'inference':
            row = self.df.iloc[idx]
            image = read_image(row[self.path_col])/256.
            data = {
                'image':image,
            }
            return data

    def train_transform(self, image):
        pass

In [None]:
# class CustomDataset(Dataset):
#     def __init__(self, df, root_dir, img_col,  mode = 'train', transforms = None):
#       self.df = df
#       self.root_dir = root_dir
#       self.img_col = img_col
#       self.mode = mode
#       self.transforms = transforms

#     def __len__(self):
#       return len(self.df)

#     def __getitem__(self, idx):
#       row = self.df.iloc[idx]
#       img_path = os.path.join(self.root_dir, row[self.img_col][2:])
#       image = Image.open(img_path)
#       image = np.array(image)
#       # image = read_image(img_path)/224.

#       if self.transforms:
#           image = self.transforms(image = image)['image']

#       if self.mode in ['train', 'val']:
#         label = row['label']

#         return {
#             'image': image,
#             'label': label
#         }

#       elif self.mode == 'test':

#         return {
#             'image': image
#         }

In [None]:
# class CustomCollateFn:
#     def __init__(self, mode):
#       self.mode = mode

#     def __call__(self, batch):

#       images = [item['image'] for item in batch]

#       if self.mode in ['train', 'val']:
#         labels = [item['label'] for item in batch]

#         return {
#           'images': torch.stack(images),
#           'labels': torch.LongTensor(labels)
#         }

#       elif self.mode == 'test':

#         return {
#           'images': torch.stack(images)
#         }

#       else:
#         raise ValueError('Invalid mode.')

In [None]:
class CustomCollateFn:
    def __init__(self, transform, mode):
        self.mode = mode
        self.transform = transform

    def __call__(self, batch):
        if self.mode=='train':
            pixel_values = torch.stack([self.transform(data['image']) for data in batch])
            label = torch.LongTensor([data['label'] for data in batch])
            return {
                'pixel_values':pixel_values,
                'label':label,
            }
        elif self.mode=='val':
            pixel_values = torch.stack([self.transform(data['image']) for data in batch])
            label = torch.LongTensor([data['label'] for data in batch])
            return {
                'pixel_values':pixel_values,
                'label':label,
            }
        elif self.mode=='inference':
            pixel_values = torch.stack([self.transform(data['image']) for data in batch])
            return {
                'pixel_values':pixel_values,
            }

In [None]:
# class CustomModel(nn.Module):
#     def __init__(self, model):
#         super(CustomModel, self).__init__()
#         self.model = model
#         self.clf = nn.Sequential(
#             nn.Tanh(),
#             nn.LazyLinear(25),
#         )

#         # self.clf = nn.Sequential(
#         #     nn.Linear(512),
#         #     nn.ReLU(),
#         #     nn.Dropout(0.3),
#         #     nn.Linear(25)
#         # )

#     def forward(self, x):
#         x = self.model(x).pooler_output
#         x = self.clf(x)
#         probs = nn.LogSoftmax(dim=-1)(x)
#         return probs

In [None]:
class CustomModel(nn.Module):
    def __init__(self, model):
        super(CustomModel, self).__init__()
        self.model = model
        self.clf = nn.Sequential(
            nn.Tanh(),
            nn.LazyLinear(25),
        )

#     @torch.compile
    def forward(self, x, label=None):
        x = self.model(x).pooler_output
        x = self.clf(x)
        loss = None
        if label is not None:
            loss = nn.CrossEntropyLoss()(x, label)
        probs = nn.LogSoftmax(dim=-1)(x)
        return probs, loss

class LitCustomModel(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = CustomModel(model)
        self.validation_step_output = []

    def configure_optimizers(self):
        opt = torch.optim.AdamW(self.parameters(), lr=1e-5)
        return opt

    def training_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        label = batch['label']
        probs, loss = self.model(x, label)
        self.log(f"train_loss", loss, on_step=True, on_epoch=False)
        return loss

    def validation_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        label = batch['label']
        probs, loss = self.model(x, label)
        self.validation_step_output.append([probs,label])
        return loss

    def predict_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        probs, _ = self.model(x)
        return probs

    def validation_epoch_end(self, step_output):
        pred = torch.cat([x for x, _ in self.validation_step_output]).cpu().detach().numpy().argmax(1)
        label = torch.cat([label for _, label in self.validation_step_output]).cpu().detach().numpy()
        score = f1_score(label,pred, average='macro')
        self.log("val_score", score)
        self.validation_step_output.clear()
        return score

In [None]:
# class LitCustomModel(pl.LightningModule):
#     def __init__(self,
#                  learning_rate,
#                  weight_decay,
#                  patience,
#                  classes = 25
#                  ):
#         super(LitCustomModel, self).__init__()

#         self.learning_rate = learning_rate
#         self.weight_decay = weight_decay
#         self.patience = patience
#         self.classes = classes
#         self.loss = nn.CrossEntropyLoss()
#         # self.train_acc = Accuracy(task='multiclass', num_classes = classes)
#         # self.val_acc = Accuracy(task='multiclass', num_classes = classes)
#         # self.model = timm.create_model('timm/maxvit_large_tf_224.in1k', pretrained = True, num_classes = classes)
#         model = Swinv2Model.from_pretrained("microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft")
#         self.model = CustomModel(model)

#         self.validation_step_outputs = []

#     def configure_optimizers(self):
#         optimizer = torch.optim.AdamW(
#             self.model.parameters(),
#             lr = self.learning_rate,
#             weight_decay = self.weight_decay
#         )

#         scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
#             optimizer,
#             mode = 'max',
#             factor = 0.2,
#             patience = self.patience,
#             verbose = True
#         )

#         scheduler = torch.optim.lr_scheduler.StepLR(
#             optimizer,
#             step_size=3,
#             gamma=0.99
#         )

#         return {
#             'optimizer': optimizer,
#             'lr_scheduler': scheduler,
#             'monitor': 'val_acc'
#         }

#     def forward(self, images):

#         output = self.model(images)
#         return output

#     def training_step(self, batch, batch_idx):

#         images = batch['images']
#         labels = batch['labels']

#         outputs = self.model(images)
#         train_loss = self.loss(outputs, labels)

#         self.log(
#           name = 'train_loss',
#           value = train_loss,
#           on_step=False,
#           on_epoch=True,
#           prog_bar = True
#         )

#         # train_f1 = f1_score(outputs, labels, average='macro')

#         # self.log(
#         #   name = 'train_f1',
#         #   value = train_f1,
#         #   on_step = False,
#         #   on_epoch = True,
#         #   prog_bar = True
#         # )

#         return train_loss

#     def validation_step(self, batch, batch_idx):
#         images = batch['images']
#         labels = batch['labels']
#         outputs = self.model(images)
#         val_loss = self.loss(outputs, labels)
#         # val_f1 = f1_score(outputs, labels, average='macro')

#         # self.log(
#         #   'val_f1',
#         #   self.val_f1,
#         #   on_step=False,
#         #   on_epoch=True,
#         #   prog_bar=True
#         # )

#         self.validation_step_outputs.append([outputs, labels])
#         return val_loss

#     def validation_epoch_end(self, outputs):
#         # loss = torch.stack([x['val_loss'] for x in self.validation_step_outputs], 0).mean()
#         # acc = torch.stack([x['val_acc'] for x in self.validation_step_outputs], 0).mean()

#         preds = torch.cat([x for x, _ in self.validation_step_outputs]).cpu().detach().numpy().argmax(1)
#         labels = torch.cat([label for _, label in self.validation_step_outputs]).cpu().detach().numpy()
#         f1 = f1_score(preds, labels, average='macro')

#         self.log(
#             name = 'val_f1',
#             value = f1,
#             on_epoch = True,
#             prog_bar = True)

#         return {'val_f1': f1}

In [None]:
SEED = 42
N_SPLIT = 5
BATCH_SIZE = 12

In [None]:
pl.seed_everything(SEED)

INFO:pytorch_lightning.utilities.seed:Global seed set to 42


42

In [None]:
# dfs = pd.read_csv('/content/drive/MyDrive/cvopen/train.csv')
# cl = dfs['label']
# skf = StratifiedKFold(n_splits=N_SPLIT, shuffle=True, random_state = SEED)
# le = LabelEncoder()
# dfs['label'] = le.fit_transform(dfs['label'])
# cl = dfs['label']

In [None]:
skf = StratifiedKFold(n_splits=N_SPLIT, shuffle=True, random_state = SEED)

In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/cvopen/train.csv')
train_df['img_path'] = train_df['img_path'].apply(lambda x: os.path.join('/content/drive/MyDrive/cvopen', x))
# train_df['upscale_img_path'] = train_df['upscale_img_path'].apply(lambda x: os.path.join('./data', x))
le = LabelEncoder()
train_df['class'] = le.fit_transform(train_df['label'])

In [None]:
train_transform = transforms.Compose([
    # transforms.Resize(size=(256,256), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomResizedCrop((256, 256)),
    transforms.RandomChoice([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(180),
        transforms.RandomVerticalFlip(),
        ]),
    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])
val_transform = transforms.Compose([
    # transforms.Resize(size=(256,256), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomResizedCrop((256, 256)),
    transforms.RandomChoice([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(180),
        transforms.RandomVerticalFlip(),
        ]),
    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])

train_collate_fn = CustomCollateFn(train_transform, 'train')
val_collate_fn = CustomCollateFn(val_transform, 'val')

In [None]:
# mean = (0.485, 0.456, 0.406)
# std = (0.229, 0.224, 0.225)

# train_transform = A.Compose([
#     A.Resize(224, 224, interpolation=cv2.INTER_CUBIC),
#     A.HorizontalFlip(p=0.5),
#     A.RandomBrightnessContrast(p=0.2),
#     A.Normalize(mean=mean, std=std),
#     ToTensorV2()
# ])

# val_transform = A.Compose([
#     A.Resize(224, 224, interpolation=cv2.INTER_CUBIC),
#     A.HorizontalFlip(p=0.5),
#     A.RandomBrightnessContrast(p=0.2),
#     A.Normalize(mean=mean, std=std),
#     ToTensorV2()
# ])

# test_transform = A.Compose([
#     A.Resize(224, 224, interpolation=cv2.INTER_CUBIC),
#     A.Normalize(mean=mean, std=std),
#     ToTensorV2()
# ])

# train_collate_fn = CustomCollateFn('train')
# val_collate_fn = CustomCollateFn('val')

In [None]:
for fold_idx, (train_index, val_index) in enumerate(skf.split(train_df, train_df['class'])):
    train_fold_df = train_df.loc[train_index,:]
    val_fold_df = train_df.loc[val_index,:]

    train_dataset = CustomDataset(train_fold_df, 'img_path', mode='train')
    val_dataset = CustomDataset(val_fold_df, 'img_path', mode='val')

    train_dataloader = DataLoader(train_dataset, collate_fn=train_collate_fn, batch_size=BATCH_SIZE)
    val_dataloader = DataLoader(val_dataset, collate_fn=val_collate_fn, batch_size=BATCH_SIZE*2)

    model = Swinv2Model.from_pretrained("microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft")
    lit_model = LitCustomModel(model)

    checkpoint_callback = ModelCheckpoint(
        monitor='val_score',
        mode='max',
        dirpath='./checkpoints/',
        filename=f'swinv2-large-resize-fold_idx={fold_idx}'+'-{epoch:02d}-{train_loss:.4f}-{val_score:.4f}',
        save_top_k=1,
        save_weights_only=True,
        verbose=True
    )
    earlystopping_callback = EarlyStopping(monitor="val_score", mode="max", patience=3)
    trainer = pl.Trainer(max_epochs=100, accelerator='auto', precision=32, callbacks=[checkpoint_callback, earlystopping_callback], val_check_interval=0.5)
    trainer.fit(lit_model, train_dataloader, val_dataloader)

    model.cpu()
    lit_model.cpu()
    del model, lit_model, checkpoint_callback, earlystopping_callback, trainer
    gc.collect()
    torch.cuda.empty_cache()


  self.pid = os.fork()
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 195 M 
--------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.812   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 528: 'val_score' reached 0.81917 (best 0.81917), saving model to '/content/checkpoints/swinv2-large-resize-fold_idx=0-epoch=00-train_loss=0.9637-val_score=0.8192.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 1056: 'val_score' reached 0.84393 (best 0.84393), saving model to '/content/checkpoints/swinv2-large-resize-fold_idx=0-epoch=00-train_loss=0.0792-val_score=0.8439.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 1584: 'val_score' reached 0.84987 (best 0.84987), saving model to '/content/checkpoints/swinv2-large-resize-fold_idx=0-epoch=01-train_loss=0.9385-val_score=0.8499.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 2112: 'val_score' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 2640: 'val_score' reached 0.86447 (best 0.86447), saving model to '/content/checkpoints/swinv2-large-resize-fold_idx=0-epoch=02-train_loss=0.4552-val_score=0.8645.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 3168: 'val_score' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 3696: 'val_score' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 4224: 'val_score' was not in top 1
  self.pid = os.fork()
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 195 M 
--------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.812   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 528: 'val_score' reached 0.80086 (best 0.80086), saving model to '/content/checkpoints/swinv2-large-resize-fold_idx=1-epoch=00-train_loss=1.5733-val_score=0.8009.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 1056: 'val_score' reached 0.83786 (best 0.83786), saving model to '/content/checkpoints/swinv2-large-resize-fold_idx=1-epoch=00-train_loss=0.6230-val_score=0.8379.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

In [None]:
# for fold_idx, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(cl)), cl)):
#     df_train = dfs.loc[train_idx, :]
#     df_val = dfs.loc[val_idx, :]

#     train_dataset = CustomDataset(df = df_train, root_dir = '/content/drive/MyDrive/cvopen', img_col = 'img_path', mode = 'train', transforms = train_transform)
#     val_dataset = CustomDataset(df = df_val, root_dir = '/content/drive/MyDrive/cvopen', img_col = 'img_path', mode = 'val', transforms = val_transform)

#     train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, collate_fn = train_collate_fn, shuffle = True)
#     val_dataloader = DataLoader(val_dataset, batch_size = BATCH_SIZE*2, collate_fn = val_collate_fn, shuffle = False)

#     lit_model = LitCustomModel(
#         learning_rate = 1e-4,
#         weight_decay = 1e-4,
#         patience = 3,
#         classes = 25
#     )

#     checkpoint_callback = ModelCheckpoint(
#         monitor = 'val_acc',
#         save_top_k = 1,
#         save_weights_only = True,
#         dirpath = './checkpoints/',
#         filename = f'swinv2-large-resize-fold_idx={fold_idx}'+'-{epoch:02d}-{train_loss:.4f}-{val_score:.4f}',
#         verbose = True,
#         mode = 'max'
#     )

#     earlystop_callback = EarlyStopping(
#         monitor = 'val_acc',
#         min_delta = 0.00,
#         patience = 10,
#         verbose = False,
#         mode = 'max'
#     )

#     # logger = WandbLogger(project = "swin-vit-model")

#     trainer = pl.Trainer(
#         max_epochs=100,
#         accelerator='auto',
#         # logger = logger,
#         callbacks=[checkpoint_callback, earlystop_callback],
#     )

#     trainer.fit(lit_model, train_dataloader, val_dataloader)

#     lit_model.cpu()
#     del lit_model, checkpoint_callback, earlystop_callback, trainer
#     gc.collect()
#     torch.cuda.empty_cache()


In [None]:
test_df = pd.read_csv('/content/drive/MyDrive/cvopen/test.csv')
test_df['img_path'] = test_df['img_path'].apply(lambda x: os.path.join('/content/drive/MyDrive/cvopen', x))

In [None]:
if not len(test_df) == len(os.listdir('/content/drive/MyDrive/cvopen/test')):
    raise ValueError()

In [None]:
test_transform = transforms.Compose([
    transforms.Resize(size=(256,256), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])

test_collate_fn = CustomCollateFn(test_transform, 'inference')
test_dataset = CustomDataset(test_df, 'img_path', mode='inference')
test_dataloader = DataLoader(test_dataset, collate_fn=test_collate_fn, batch_size=BATCH_SIZE*2)

In [None]:
fold_preds = []
for checkpoint_path in glob('./checkpoints/swinv2-large-resize*.ckpt'):
    model = Swinv2Model.from_pretrained("microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft")
    lit_model = LitCustomModel.load_from_checkpoint(checkpoint_path, model=model)
    trainer = L.Trainer( accelerator='auto', precision=32)
    preds = trainer.predict(lit_model, test_dataloader)
    preds = torch.cat(preds,dim=0).detach().cpu().numpy().argmax(1)
    fold_preds.append(preds)
pred_ensemble = list(map(lambda x: np.bincount(x).argmax(),np.stack(fold_preds,axis=1)))

In [None]:
submission = pd.read_csv('/content/drive/MyDrive/cvopen/sample_submission.csv')

In [None]:
submission['label'] = le.inverse_transform(pred_ensemble)

In [None]:
submission.to_csv('swinv2_large.csv',index=False)