In [None]:
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torch.nn.functional as F
import numpy as np
import PIL
import torch
from pathlib import Path
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Compose, Normalize, Resize, CenterCrop, ToTensor

id2label = {
    0: "Cassava Bacterial Blight (CBB)",
    1: "Cassava Brown Streak Disease (CBSD)",
    2: "Cassava Green Mottle (CGM)",
    3: "Cassava Mosaic Disease (CMD)",
    4: "Healthy",
}
label2id = {
    "Cassava Bacterial Blight (CBB)": 0,
    "Cassava Brown Streak Disease (CBSD)": 1,
    "Cassava Green Mottle (CGM)": 2,
    "Cassava Mosaic Disease (CMD)": 3,
    "Healthy": 4,
}

folder = Path("/kaggle/input/cassava-leaf-disease-classification/test_images")

# ViT full

In [None]:
model_path = "/kaggle/input/sc4000-vit-large/models"

model = AutoModelForImageClassification.from_pretrained(
    model_path,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
)
image_processor = AutoImageProcessor.from_pretrained(model_path)

In [None]:
class CassavaDatasetViT(Dataset):
    def __init__(self, folder, image_processor):
        self.folder = folder
        self.image_processor = image_processor
        self.image_paths = list(folder.glob("*"))
        self.image_mean, self.image_std = (
            self.image_processor.image_mean,
            self.image_processor.image_std,
        )
        size = self.image_processor.size["shortest_edge"]
        normalize = Normalize(mean=self.image_mean, std=self.image_std)
        self.test_transforms = Compose(
            [
                Resize(size),
                CenterCrop(size),
                ToTensor(),
                normalize,
            ]
        )

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        with PIL.Image.open(image_path) as image:
            inputs = self.test_transforms(image.convert("RGB"))
        return inputs, image_path.name

In [None]:
vit_outputs = []

dataset = CassavaDatasetViT(folder, image_processor)
dataloader = DataLoader(dataset, batch_size=16)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

with torch.no_grad():
    for batch, image_names in dataloader:
        outputs = model(batch.to(device))
        probabilities = F.softmax(outputs.logits, dim=-1).cpu().numpy()

        vit_outputs.extend(
            {"image_id": image_name, "output": output}
            for image_name, output in zip(image_names, probabilities)
        )

In [None]:
vit_outputs = {x["image_id"]: x["output"] for x in vit_outputs}

# ConvNeXt-V2 Base

In [None]:
class CassavaDatasetConvNeXtV2(Dataset):
    def __init__(self, folder, image_processor):
        self.folder = folder
        self.image_processor = image_processor
        self.image_paths = list(folder.glob("*"))
        self.image_mean, self.image_std = (
            self.image_processor.image_mean,
            self.image_processor.image_std,
        )
        size = self.image_processor.size["shortest_edge"]
        normalize = Normalize(mean=self.image_mean, std=self.image_std)
        self.test_transforms = Compose(
            [
                Resize(size),
                CenterCrop(size),
                ToTensor(),
                normalize,
            ]
        )

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        with PIL.Image.open(image_path) as image:
            inputs = self.test_transforms(image.convert("RGB"))
        return inputs, image_path.name

In [None]:
convnext_outputs = []

dataset = CassavaDatasetConvNeXtV2(folder, image_processor)
dataloader = DataLoader(dataset, batch_size=16)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

with torch.no_grad():
    for batch, image_names in dataloader:
        outputs = model(batch.to(device))
        # predictions = outputs.logits.argmax(dim=-1).cpu().numpy()
        probabilities = F.softmax(outputs.logits, dim=-1).cpu().numpy()

        convnext_outputs.extend(
            {"image_id": image_name, "output": output}
            for image_name, output in zip(image_names, probabilities)
        )

In [None]:
convnext_outputs = {x["image_id"]: x["output"] for x in convnext_outputs}

# CropNet (MobileNetV3)

In [None]:
from huggingface_hub import from_pretrained_keras
import tf_keras as keras
from pathlib import Path
import tensorflow as tf
from PIL import Image

model = from_pretrained_keras("/kaggle/input/cropnet-mobilenetv3/models")

image_size = 224
resize_scale = 1.5
image_resize_shape = int(resize_scale * image_size)
batch_size = 32

In [None]:
val_transforms = [
    lambda img: tf.image.resize(
        img, (image_resize_shape, image_resize_shape)
    ),
    lambda img: tf.image.resize_with_crop_or_pad(
        img, target_height=image_size, target_width=image_size
    ),
    lambda img: img / 255.0,
]

def val_image_transforms(image):
    for fn in val_transforms:
        image = fn(image)
    return image

def open_image(path):
    with Image.open(path) as image:
        image = keras.utils.img_to_array(image)
    return val_image_transforms(image)

In [None]:
folder = Path("/kaggle/input/cassava-leaf-disease-classification/test_images")
images = [(path.name, open_image(path)) for path in folder.glob("*")]
ids, inputs = map(list, zip(*images))

input_data = tf.data.experimental.from_list(inputs).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
outputs = model.predict(input_data)[:, :-1].numpy()

In [None]:
cropnet_outputs = [{"image_id": id, "output": output} for id, output in zip(ids, outputs)]

# Merging

In [None]:
final_answers = []
for image_id in vit_outputs.keys():
    vit_output = vit_outputs[image_id]
    convnext_output = convnext_outputs[image_id]
    cropnet_output = cropnet_outputs[image_id]
    final_output = (vit_output + convnext_output + cropnet_output) / 3
    final_answers.append({"image_id": image_id, "label": np.argmax(final_output)})

In [None]:
df = pd.DataFrame(submissions)

In [None]:
df.head()

In [None]:
df.to_csv("submission.csv", index=False)