In [1]:
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torch.nn.functional as F
import numpy as np
import PIL
import torch
from pathlib import Path
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Compose, Normalize, Resize, CenterCrop, ToTensor
import gc
from numba import cuda

id2label = {
    0: "Cassava Bacterial Blight (CBB)",
    1: "Cassava Brown Streak Disease (CBSD)",
    2: "Cassava Green Mottle (CGM)",
    3: "Cassava Mosaic Disease (CMD)",
    4: "Healthy",
}
label2id = {
    "Cassava Bacterial Blight (CBB)": 0,
    "Cassava Brown Streak Disease (CBSD)": 1,
    "Cassava Green Mottle (CGM)": 2,
    "Cassava Mosaic Disease (CMD)": 3,
    "Healthy": 4,
}

folder = Path("./image")

  from .autonotebook import tqdm as notebook_tqdm
2024-10-30 02:35:06.884508: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-30 02:35:06.904364: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-30 02:35:06.910283: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-30 02:35:06.925509: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler fl

# ViT full

In [2]:
model_path = "pufanyi/SC4000_vit_base_full_13500"

model = AutoModelForImageClassification.from_pretrained(
    model_path,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
)
image_processor = AutoImageProcessor.from_pretrained(model_path)

In [3]:
class CassavaDatasetViT(Dataset):
    def __init__(self, folder, image_processor):
        self.folder = folder
        self.image_processor = image_processor
        self.image_paths = list(folder.glob("*"))
        self.image_mean, self.image_std = (
            self.image_processor.image_mean,
            self.image_processor.image_std,
        )
        size = self.image_processor.size["height"]
        normalize = Normalize(mean=self.image_mean, std=self.image_std)
        self.test_transforms = Compose(
            [
                Resize(size),
                CenterCrop(size),
                ToTensor(),
                normalize,
            ]
        )

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        with PIL.Image.open(image_path) as image:
            inputs = self.test_transforms(image.convert("RGB"))
        return inputs, image_path.name

In [4]:
vit_outputs = []

dataset = CassavaDatasetViT(folder, image_processor)
dataloader = DataLoader(dataset, batch_size=16)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

with torch.no_grad():
    for batch, image_names in dataloader:
        outputs = model(batch.to(device))
        probabilities = F.softmax(outputs.logits, dim=-1).cpu().numpy()

        vit_outputs.extend(
            {"image_id": image_name, "output": output}
            for image_name, output in zip(image_names, probabilities)
        )

In [5]:
vit_outputs = {x["image_id"]: x["output"] for x in vit_outputs}

In [6]:
model.cpu()
del model
gc.collect()
torch.cuda.empty_cache()

# ConvNeXt-V2 Base

In [7]:
model_path = "pufanyi/SC4000_ConvNeXtV2_base_full_9000"
model = AutoModelForImageClassification.from_pretrained(
    model_path,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
)
image_processor = AutoImageProcessor.from_pretrained(model_path)

In [8]:
class CassavaDatasetConvNeXtV2(Dataset):
    def __init__(self, folder, image_processor):
        self.folder = folder
        self.image_processor = image_processor
        self.image_paths = list(folder.glob("*"))
        self.image_mean, self.image_std = (
            self.image_processor.image_mean,
            self.image_processor.image_std,
        )
        size = self.image_processor.size["shortest_edge"]
        normalize = Normalize(mean=self.image_mean, std=self.image_std)
        self.test_transforms = Compose(
            [
                Resize(size),
                CenterCrop(size),
                ToTensor(),
                normalize,
            ]
        )

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        with PIL.Image.open(image_path) as image:
            inputs = self.test_transforms(image.convert("RGB"))
        return inputs, image_path.name

In [9]:
convnext_outputs = []

dataset = CassavaDatasetConvNeXtV2(folder, image_processor)
dataloader = DataLoader(dataset, batch_size=16)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

with torch.no_grad():
    for batch, image_names in dataloader:
        outputs = model(batch.to(device))
        # predictions = outputs.logits.argmax(dim=-1).cpu().numpy()
        probabilities = F.softmax(outputs.logits, dim=-1).cpu().numpy()

        convnext_outputs.extend(
            {"image_id": image_name, "output": output}
            for image_name, output in zip(image_names, probabilities)
        )

In [10]:
convnext_outputs = {x["image_id"]: x["output"] for x in convnext_outputs}

In [11]:
model.cpu()
del model
gc.collect()
torch.cuda.empty_cache()

# CropNet (MobileNetV3)

In [12]:
from huggingface_hub import from_pretrained_keras
import tf_keras as keras
from pathlib import Path
import tensorflow as tf
from PIL import Image

model = from_pretrained_keras("pufanyi/SC4000-MobileNetV3")

image_size = 224
resize_scale = 1.5
image_resize_shape = int(resize_scale * image_size)
batch_size = 32

Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 74898.29it/s]
I0000 00:00:1730255749.219469   27938 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-30 02:35:49.224484: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [13]:
val_transforms = [
    lambda img: tf.image.resize(img, (image_resize_shape, image_resize_shape)),
    lambda img: tf.image.resize_with_crop_or_pad(
        img, target_height=image_size, target_width=image_size
    ),
    lambda img: img / 255.0,
]


def val_image_transforms(image):
    for fn in val_transforms:
        image = fn(image)
    return image


def open_image(path):
    with Image.open(path) as image:
        image = keras.utils.img_to_array(image)
    return val_image_transforms(image)

In [14]:
folder = Path("./image")
images = [(path.name, open_image(path)) for path in folder.glob("*")]
ids, inputs = map(list, zip(*images))

input_data = (
    tf.data.experimental.from_list(inputs)
    .batch(batch_size)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

In [15]:
outputs = model.predict(input_data)[:, :-1]



In [16]:
cropnet_outputs = {id: output for id, output in zip(ids, outputs)}

In [17]:
device = cuda.get_current_device()
device.reset()

# Merging

In [18]:
import json
vit_outputs_json = {}
convnext_outputs_json = {}
cropnet_outputs_json = {}
for image_id in vit_outputs.keys():
    vit_outputs_json[image_id] = vit_outputs[image_id].tolist()
    convnext_outputs_json[image_id] = convnext_outputs[image_id].tolist()
    cropnet_outputs_json[image_id] = cropnet_outputs[image_id].tolist()

with open("result/vit_outputs.json", "w") as f:
    json.dump(vit_outputs_json, f)

with open("result/convnext_outputs.json", "w") as f:
    json.dump(convnext_outputs_json, f)

with open("result/cropnet_outputs.json", "w") as f:
    json.dump(cropnet_outputs_json, f)

In [19]:
final_answers = []
for image_id in vit_outputs.keys():
    vit_output = vit_outputs[image_id]
    convnext_output = convnext_outputs[image_id]
    cropnet_output = cropnet_outputs[image_id]
    final_output = (vit_output + convnext_output + cropnet_output) / 3
    final_answers.append({"image_id": image_id, "label": np.argmax(final_output)})

In [20]:
df = pd.DataFrame(final_answers)

In [21]:
df.head()

Unnamed: 0,image_id,label
0,1335111806.jpg,3
1,3551690103.jpg,1
2,3080364100.jpg,3
3,674941646.jpg,2
4,2716766472.jpg,3


In [22]:
df.to_csv("submission.csv", index=False)