# [Plant Seedlings Classification](https://www.kaggle.com/competitions/plant-seedlings-classification/code)


In [18]:
from datetime import datetime
from os import cpu_count, listdir, path
from pathlib import Path
from random import seed
from typing import List, Tuple

import numpy as np
import pandas as pd
import torch
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from torch import Tensor
from torch.utils.data import DataLoader, Dataset

## Global Variables


In [19]:
# Metal Performance Shaders (MPS) - Apple Metal GPU acceleration
def get_mps_device() -> torch.device:
    if torch.backends.mps.is_available():
        device = torch.device("mps")
        x = torch.ones(1, device=device)
    elif torch.cuda.is_available():
        device = torch.device("cuda")
        x = torch.ones(1, device=device)
    else:
        device = torch.device("cpu")
    return device


DEVICE = get_mps_device()

DATA_ROOT = Path("./data")
TRAIN_DIR = DATA_ROOT / "train"
TEST_DIR = DATA_ROOT / "test"

RANDOM_SEED = 42
BATCH_SIZE = 64

print(f"Torch version: {torch.__version__}")
print(f"CPU cores: {cpu_count()}")

print(f"DEVICE: {DEVICE}")
print(f"DATA_ROOT: {DATA_ROOT}")
print("Random seed:", RANDOM_SEED)
print("Batch size:", BATCH_SIZE)

print("MPS available:", torch.backends.mps.is_available())
print("MPS built:", torch.backends.mps.is_built())

Torch version: 2.5.1
CPU cores: 8
DEVICE: mps
DATA_ROOT: data
Random seed: 42
Batch size: 64
MPS available: True
MPS built: True


## Reproducibility


In [20]:
seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

## Data Loading


In [21]:
transform_resize = (224, 224)

transform = transforms.Compose(
    [
        transforms.Resize(transform_resize),
        transforms.ToTensor(),
    ]
)

dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=transform)
loader = DataLoader(
    dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True
)

total_sum = torch.zeros(3)
total_squared_sum = torch.zeros(3)
num_pixels = 0

for images, _ in loader:
    images: Tensor

    total_sum += images.sum(dim=[0, 2, 3])
    total_squared_sum += (images**2).sum(dim=[0, 2, 3])
    num_pixels += images.size(0) * images.size(2) * images.size(3)

transform_mean = total_sum / num_pixels
transform_std = torch.sqrt((total_squared_sum / num_pixels) - (transform_mean**2))

print("Mean:", transform_mean)
print("Std:", transform_std)

Mean: tensor([0.3288, 0.2894, 0.2073])
Std: tensor([0.1039, 0.1093, 0.1266])


## Models


In [22]:
KERNEL_SIZE = 3
STRIDE = 1
PADDING = 1


class CustomCNN(torch.nn.Module):
    def __init__(self, num_classes: int = len(dataset.classes)):
        super(CustomCNN, self).__init__()

        self.conv1 = torch.nn.Conv2d(
            in_channels=3,
            out_channels=16,
            kernel_size=KERNEL_SIZE,
            stride=STRIDE,
            padding=PADDING,
        )
        self.conv2 = torch.nn.Conv2d(
            in_channels=self.conv1.out_channels,
            out_channels=32,
            kernel_size=KERNEL_SIZE,
            stride=STRIDE,
            padding=PADDING,
        )
        self.conv3 = torch.nn.Conv2d(
            in_channels=self.conv2.out_channels,
            out_channels=64,
            kernel_size=KERNEL_SIZE,
            stride=STRIDE,
            padding=PADDING,
        )
        self.conv4 = torch.nn.Conv2d(
            in_channels=self.conv3.out_channels,
            out_channels=128,
            kernel_size=KERNEL_SIZE,
            stride=STRIDE,
            padding=PADDING,
        )
        self.conv5 = torch.nn.Conv2d(
            in_channels=self.conv4.out_channels,
            out_channels=256,
            kernel_size=KERNEL_SIZE,
            stride=STRIDE,
            padding=PADDING,
        )

        self.pool = torch.nn.MaxPool2d(
            kernel_size=2,
            stride=2,
            padding=0,
        )

        self.relu = torch.nn.ReLU()

        dummy_input = torch.randn(1, 3, transform_resize[0], transform_resize[1])
        with torch.no_grad():
            in_features_fc1 = self.pool(
                self.conv5(
                    self.pool(
                        self.conv4(
                            self.pool(
                                self.conv3(
                                    self.pool(
                                        self.conv2(self.pool(self.conv1(dummy_input)))
                                    )
                                )
                            )
                        )
                    )
                )
            ).numel()

        self.fc1 = torch.nn.Linear(
            in_features=in_features_fc1,
            out_features=128,
        )
        self.fc2 = torch.nn.Linear(
            in_features=self.fc1.out_features,
            out_features=num_classes,
        )

        self.droupout = torch.nn.Dropout(p=0.5)
        self.bn1 = torch.nn.BatchNorm2d(self.conv1.out_channels)
        self.bn2 = torch.nn.BatchNorm2d(self.conv2.out_channels)
        self.bn3 = torch.nn.BatchNorm2d(self.conv3.out_channels)

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv1(x)
        # x = self.bn1(x)
        x = self.relu(x)
        x = self.pool(x)

        x = self.conv2(x)
        # x = self.bn2(x)
        x = self.relu(x)
        x = self.pool(x)

        x = self.conv3(x)
        # x = self.bn3(x)
        x = self.relu(x)
        x = self.pool(x)

        x = self.conv4(x)
        x = self.relu(x)
        x = self.pool(x)

        x = self.conv5(x)
        x = self.relu(x)
        x = self.pool(x)

        # Flatten from (batch_size, C, H, W) to (batch_size, C*H*W)
        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = self.relu(x)
        x = self.droupout(x)
        x = self.fc2(x)

        return x


model = CustomCNN(num_classes=len(dataset.classes)).to(DEVICE)
print(model)

CustomCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
  (fc1): Linear(in_features=12544, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=12, bias=True)
  (droupout): Dropout(p=0.5, inplace=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [23]:
# Load models
model_custom_cnn: CustomCNN = torch.load(
    DATA_ROOT / "model_0.92695.pth", map_location=DEVICE, weights_only=False
)
model_custom_cnn = model_custom_cnn.to(DEVICE)

model_resnet: models.ResNet = torch.load(
    DATA_ROOT / "model_0.96095.pth", map_location=DEVICE, weights_only=False
)
model_resnet = model_resnet.to(DEVICE)

model_vit = torch.load(
    DATA_ROOT / "model_0.96725.pth", map_location=DEVICE, weights_only=False
)

## Submission


In [24]:
class TestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.image_files = [f for f in listdir(root_dir) if f.endswith(".png")]
        self.transform = transform

    def __len__(self) -> int:
        return len(self.image_files)

    def __getitem__(self, idx) -> Tuple[Tensor, str]:
        img_path = path.join(self.root_dir, self.image_files[idx])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, self.image_files[idx]


transform = transforms.Compose(
    [
        transforms.Resize(transform_resize),
        transforms.ToTensor(),
    ]
)

test_dataset = TestDataset(root_dir=TEST_DIR, transform=transform)
test_loader = DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True
)

print(f"Test dataset length: {len(test_dataset)}")

Test dataset length: 794


In [25]:
model_custom_cnn.eval()
model_resnet.eval()
model_vit.eval()

w_custom_cnn = 0.25
w_resnet = 0.25
w_vit = 1 - w_custom_cnn - w_resnet

print(f"Custom CNN weight: {w_custom_cnn}")
print(f"ResNet weight: {w_resnet}")
print(f"ViT weight: {w_vit}")

predictions = []
with torch.no_grad():
    for images, image_names in test_loader:
        images: Tensor
        image_names: List[str]

        images = images.to(DEVICE)
        images_resnet = (
            images.clone()
            - torch.tensor([0.485, 0.456, 0.406]).to(DEVICE).view(1, 3, 1, 1)
        ) / torch.tensor([0.229, 0.224, 0.225]).to(DEVICE).view(1, 3, 1, 1)
        images_custom_cnn = (
            images - transform_mean.to(DEVICE).view(1, 3, 1, 1)
        ) / transform_std.to(DEVICE).view(1, 3, 1, 1)

        probs_custom = torch.nn.functional.softmax(
            model_custom_cnn(images_custom_cnn), dim=1
        )
        probs_resnet = torch.nn.functional.softmax(model_resnet(images_resnet), dim=1)
        probs_vit = torch.nn.functional.softmax(model_vit(images_custom_cnn), dim=1)

        ensemble_probs = (
            w_custom_cnn * probs_custom + w_resnet * probs_resnet + w_vit * probs_vit
        )

        _, preds = torch.max(ensemble_probs, dim=1)

        for image_name, pred in zip(image_names, preds):
            predictions.append(
                {"file": image_name, "species": dataset.classes[pred.item()]}
            )

submission_file = (
    DATA_ROOT / f"submission-{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
)
predictions_df = pd.DataFrame(predictions)
predictions_df.to_csv(submission_file, index=False)

print(f"Predictions saved to '{submission_file}':")
print(predictions_df.head())

Custom CNN weight: 0.3
ResNet weight: 0.3
ViT weight: 0.4
Predictions saved to 'data/submission-2024-12-30_13-24-33.csv':
            file           species
0  1b490196c.png   Shepherds Purse
1  85431c075.png  Loose Silky-bent
2  506347cfe.png          Cleavers
3  7f46a71db.png        Sugar beet
4  668c1007c.png          Charlock
