In [1]:
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torch.nn.functional as F
import numpy as np
import PIL
import torch
from pathlib import Path
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Compose, Normalize, Resize, CenterCrop, ToTensor

id2label = {
    0: "Cassava Bacterial Blight (CBB)",
    1: "Cassava Brown Streak Disease (CBSD)",
    2: "Cassava Green Mottle (CGM)",
    3: "Cassava Mosaic Disease (CMD)",
    4: "Healthy",
}
label2id = {
    "Cassava Bacterial Blight (CBB)": 0,
    "Cassava Brown Streak Disease (CBSD)": 1,
    "Cassava Green Mottle (CGM)": 2,
    "Cassava Mosaic Disease (CMD)": 3,
    "Healthy": 4,
}

folder = Path("/kaggle/input/cassava-leaf-disease-classification/test_images")

# ViT full

In [2]:
model_path = "/kaggle/input/sc4000-vit-large/models"

model = AutoModelForImageClassification.from_pretrained(
    model_path,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
)
image_processor = AutoImageProcessor.from_pretrained(model_path)

In [3]:
class CassavaDatasetViT(Dataset):
    def __init__(self, folder, image_processor):
        self.folder = folder
        self.image_processor = image_processor
        self.image_paths = list(folder.glob("*"))
        self.image_mean, self.image_std = (
            self.image_processor.image_mean,
            self.image_processor.image_std,
        )
        size = self.image_processor.size["height"]
        normalize = Normalize(mean=self.image_mean, std=self.image_std)
        self.test_transforms = Compose(
            [
                Resize(size),
                CenterCrop(size),
                ToTensor(),
                normalize,
            ]
        )

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        with PIL.Image.open(image_path) as image:
            inputs = self.test_transforms(image.convert("RGB"))
        return inputs, image_path.name

In [4]:
vit_outputs = []

dataset = CassavaDatasetViT(folder, image_processor)
dataloader = DataLoader(dataset, batch_size=16)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

with torch.no_grad():
    for batch, image_names in dataloader:
        outputs = model(batch.to(device))
        probabilities = F.softmax(outputs.logits, dim=-1).cpu().numpy()

        vit_outputs.extend(
            {"image_id": image_name, "output": output}
            for image_name, output in zip(image_names, probabilities)
        )

# ConvNeXt-V2 Base

In [5]:
model_path = "/kaggle/input/sc4000-convnext-v2-base/models"
model = AutoModelForImageClassification.from_pretrained(
    model_path,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
)
image_processor = AutoImageProcessor.from_pretrained(model_path)

In [6]:
class CassavaDatasetConvNeXtV2(Dataset):
    def __init__(self, folder, image_processor):
        self.folder = folder
        self.image_processor = image_processor
        self.image_paths = list(folder.glob("*"))
        self.image_mean, self.image_std = (
            self.image_processor.image_mean,
            self.image_processor.image_std,
        )
        size = self.image_processor.size["shortest_edge"]
        normalize = Normalize(mean=self.image_mean, std=self.image_std)
        self.test_transforms = Compose(
            [
                Resize(size),
                CenterCrop(size),
                ToTensor(),
                normalize,
            ]
        )

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        with PIL.Image.open(image_path) as image:
            inputs = self.test_transforms(image.convert("RGB"))
        return inputs, image_path.name

In [7]:
convnext_outputs = []

dataset = CassavaDatasetConvNeXtV2(folder, image_processor)
dataloader = DataLoader(dataset, batch_size=16)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

with torch.no_grad():
    for batch, image_names in dataloader:
        outputs = model(batch.to(device))
        # predictions = outputs.logits.argmax(dim=-1).cpu().numpy()
        probabilities = F.softmax(outputs.logits, dim=-1).cpu().numpy()

        convnext_outputs.extend(
            {"image_id": image_name, "output": output}
            for image_name, output in zip(image_names, probabilities)
        )

# Merging

In [8]:
vit_outputs = {x["image_id"]: x["output"] for x in vit_outputs}
convnext_outputs = {x["image_id"]: x["output"] for x in convnext_outputs}

In [9]:
final_answers = []
for image_id in vit_outputs.keys():
    vit_output = vit_outputs[image_id]
    convnext_output = convnext_outputs[image_id]
    final_output = (vit_output + convnext_output) / 2
    final_answers.append({"image_id": image_id, "label": np.argmax(final_output)})

In [10]:
df = pd.DataFrame(final_answers)

In [11]:
df.head()

Unnamed: 0,image_id,label
0,2216849948.jpg,4


In [12]:
df.to_csv("submission.csv", index=False)