In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/AI/neurowood
!ls

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/AI/neurowood
ConvNeXt  data	Ensemble.ipynb	Training.ipynb	ViT


In [2]:
!pip install datasets transformers wandb timm==0.4.12 six tensorboardX -qq

In [3]:
import argparse
import datetime
import numpy as np
import time
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import json
import os
from PIL import Image
from tqdm import tqdm
import csv
from pathlib import Path

from timm.data.constants import \
    IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from timm.models import create_model
import torchvision.transforms as T

from transformers import ViTForImageClassification, AutoFeatureExtractor

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [8]:
def create_submit(data_path, model, tta_transforms=[]):
    test_images = os.listdir(data_path)
    labels = {}
    preds = []
    for test_image in tqdm(test_images):
        orig_image = Image.open(os.path.join(data_path, test_image))
        for transform in tta_transforms:
            image = transform(orig_image)
            output = model(image)
            preds.append(output)
        output = model(orig_image)
        preds.append(output)
        mn = torch.mean(torch.cat(preds, dim=0), dim=0)
        labels[int(test_image.split('.')[0])] = torch.argmax(mn)
    labels = {k: labels[k] for k in sorted(labels)}
 
    with open('submit.csv', 'w', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(['id', 'class'])
        for idx, label in labels.items():
            label = label.cpu().numpy()
            if label == 2:
                label = 3
            writer.writerow([idx, label])

## ViT Fold Inference

In [5]:
class ViTFold:
    def __init__(self, dev_path, labels):
        self.dev_path = dev_path
        self.labels = labels
        self.feature_extractor = AutoFeatureExtractor.from_pretrained(self.dev_path + '_fold1')
        self.fold1 = ViTForImageClassification.from_pretrained(self.dev_path + '_fold1',
            num_labels=len(self.labels),
            id2label={str(i): c for i, c in enumerate(self.labels)},
            label2id={c: str(i) for i, c in enumerate(self.labels)}
        )
        self.fold2 = ViTForImageClassification.from_pretrained(self.dev_path + '_fold2',
            num_labels=len(self.labels),
            id2label={str(i): c for i, c in enumerate(self.labels)},
            label2id={c: str(i) for i, c in enumerate(self.labels)}
        )
        self.fold3 = ViTForImageClassification.from_pretrained(self.dev_path + '_fold3',
            num_labels=len(self.labels),
            id2label={str(i): c for i, c in enumerate(self.labels)},
            label2id={c: str(i) for i, c in enumerate(self.labels)}
        )
        self.models = [self.fold1, self.fold2, self.fold3]

    def __call__(self, image): 
        image = self.feature_extractor(image, return_tensors='pt')['pixel_values']
        preds = []
        for model in self.models:
            preds.append(torch.softmax(model(image)[0], dim=1).cpu())
        
        output = torch.mean(torch.cat(preds, dim=0), dim=0)
        return output

vit_model = ViTFold(dev_path='./ViT/vit-base-patch32-384', labels=[0, 1, 2])

In [6]:
from torchvision import transforms
tta_transforms = [
    transforms.RandomHorizontalFlip(p=1),
    transforms.RandomVerticalFlip(p=1),
    transforms.RandomRotation(90),
    transforms.RandomRotation(180),
    transforms.RandomRotation(270)
]

In [None]:
create_submit(data_path='data/test', model=vit_model, tta_transforms=tta_transforms)

## ConvNeXt Fold inference

In [10]:
%cd ConvNeXt

/content/drive/MyDrive/AI/neurowood/ConvNeXt


In [11]:
import models.convnext
import models.convnext_isotropic


class ConvFold:
    def __init__(self, dev_path, model_name, transforms):
        self.dev_path = dev_path
        self.transforms = transforms
        self.fold1 = create_model(
                model_name, 
                pretrained=False, 
                num_classes=3,
        )
        self.fold2 = create_model(
                model_name, 
                pretrained=False, 
                num_classes=3,
        )
        self.fold3 = create_model(
                model_name, 
                pretrained=False, 
                num_classes=3,
        )                
        
        self.models = [self.fold1, self.fold2, self.fold3]
        for idx, model in enumerate(self.models):
            model.load_state_dict(torch.load(dev_path + f'_fold{idx+1}.pth')['model'])

    def __call__(self, image): 
        image = self.transforms(image).unsqueeze(0).to(device)
        preds = []
        for model in self.models:
            model = model.to(device)
            preds.append(torch.softmax(model(image).cpu(), dim=1))
            model = model.to('cpu')
        output = torch.mean(torch.stack(preds, dim=0), dim=0)
        return output

NORMALIZE_MEAN = IMAGENET_DEFAULT_MEAN
NORMALIZE_STD = IMAGENET_DEFAULT_STD
SIZE = 384

orig_transforms = [
    T.Resize((SIZE, SIZE)),
    T.ToTensor(),
    T.Normalize(NORMALIZE_MEAN, NORMALIZE_STD),
]

orig_transforms = T.Compose(orig_transforms)          

conv_model = ConvFold('./checkpoints/convnext_base_1k_384', "convnext_base", transforms=orig_transforms)

In [None]:
create_submit(data_path='../data/test', model=conv_model, tta_transforms=tta_transforms)