In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/Sample_submission.csv
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/predicate-matrix-continuous.txt
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/predicate-matrix-binary.txt
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/predicates.txt
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/classes.txt
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/test/02371.jpg
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/test/00767.jpg
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/test/02360.jpg
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/test/00266.jpg
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/test/02450.jpg
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/test/01496.jpg
/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-datas

In [2]:
import os
import torch
from torchvision import models
from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from PIL import Image
from torch.optim.lr_scheduler import CyclicLR

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# Hyperparameters
num_epochs = 10
learning_rate = 0.001
batch_size = 16
image_size = 380

In [5]:
# importing datasets
train_data_path = '/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/train'
test_data_path = '/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/vlg-dataset/test'

In [6]:
# Data preprocessing
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
    transforms.RandomVerticalFlip(p=0.3),
    transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)),  # Random crops
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [7]:
# load training dataset
train_dataset = ImageFolder(root=train_data_path, transform=transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

In [8]:
# get classses names of the train dataset
class_names = train_dataset.classes

In [9]:
from torchvision import models
from torchvision.models import EfficientNet_B4_Weights
import torch
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()

        # Custom CNN layers
        self.conv_layers = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.SiLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.SiLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.SiLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.SiLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.SiLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.SiLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.SiLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 4
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.SiLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.SiLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.SiLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 5
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.SiLU(),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.SiLU(),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.SiLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Global Average Pooling to reduce feature map dimensions
            nn.AdaptiveAvgPool2d((1, 1))
        )

        # Pretrained EfficientNet-B4 for feature extraction
        self.pretrained_model = models.efficientnet_b4(weights=EfficientNet_B4_Weights.IMAGENET1K_V1)
        self.pretrained_features = self.pretrained_model.features
        self.pretrained_feature_dim = self.pretrained_model.classifier[-1].in_features


        # Dropout layer
        self.dropout = nn.Dropout(0.5)

        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(512 + self.pretrained_feature_dim, 1024),  # Combine custom CNN and EfficientNet features
            nn.SiLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        # Custom CNN features
        custom_features = self.conv_layers(x)
        custom_features = custom_features.view(custom_features.size(0), -1)  # Flatten after GAP

        # Pretrained EfficientNet-B4 features
        pretrained_features = self.pretrained_features(x)  # Output shape: (batch_size, channels, height, width)
        pretrained_features = nn.AdaptiveAvgPool2d((1, 1))(pretrained_features)  # Reduce to (batch_size, channels, 1, 1)
        pretrained_features = pretrained_features.view(pretrained_features.size(0), -1)  # Flatten to (batch_size, channels)


        # Concatenate features
        combined_features = torch.cat((custom_features, pretrained_features), dim=1)

        # Classification
        output = self.fc(combined_features)
        return output


In [10]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=None):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha

    def forward(self, inputs, targets):
        probs = F.softmax(inputs, dim=1)  # Compute softmax probabilities
        targets_one_hot = F.one_hot(targets, num_classes=inputs.size(1)).float()
        true_probs = (probs * targets_one_hot).sum(dim=1)
        
        log_probs = torch.log(true_probs + 1e-8)  # Avoid log(0)
        focal_weights = (1 - true_probs) ** self.gamma
        loss = -focal_weights * log_probs

        if self.alpha is not None:
            alpha_weights = torch.tensor(self.alpha).to(inputs.device)
            class_weights = (alpha_weights * targets_one_hot).sum(dim=1)
            loss *= class_weights

        return loss.mean()

In [11]:
class LabelSmoothingLoss(nn.Module):
    def __init__(self, num_classes, smoothing=0.1):
        super(LabelSmoothingLoss, self).__init__()
        self.num_classes = num_classes
        self.smoothing = smoothing

    def forward(self, inputs, targets):
        one_hot = F.one_hot(targets, self.num_classes).float()
        smooth_labels = one_hot * (1 - self.smoothing) + self.smoothing / self.num_classes
        log_probs = F.log_softmax(inputs, dim=1)
        return -(smooth_labels * log_probs).sum(dim=1).mean()

In [12]:
# initializing model
num_classes = len(class_names)
model = CNN(num_classes)  # Initialize your model
model = model.to(device)  # Move the model to GPU

Downloading: "https://download.pytorch.org/models/efficientnet_b4_rwightman-23ab8bcd.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b4_rwightman-23ab8bcd.pth
100%|██████████| 74.5M/74.5M [00:00<00:00, 188MB/s]


In [13]:
# optimizer and criterion
criterion = LabelSmoothingLoss(num_classes=num_classes, smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = CyclicLR(optimizer, base_lr=1e-5, max_lr=learning_rate, step_size_up=5, mode='triangular2')

In [14]:
# Training Loop
print('TRAINING STARTED....')

for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, targets) in enumerate(train_loader):
        data, targets = data.to(device), targets.to(device)

        # Forward
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        scheduler.step()

    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {loss.item():.4f}")

print('TRAINING COMPLETED!!!')

TRAINING STARTED....
Epoch [1/10] Loss: 2.0944
Epoch [2/10] Loss: 2.2690
Epoch [3/10] Loss: 1.8468
Epoch [4/10] Loss: 1.4672
Epoch [5/10] Loss: 1.2625
Epoch [6/10] Loss: 1.3583
Epoch [7/10] Loss: 1.0879
Epoch [8/10] Loss: 1.3684
Epoch [9/10] Loss: 1.3770
Epoch [10/10] Loss: 1.2700
TRAINING COMPLETED!!!


In [15]:
def predict_image(image_path, model):
    model.eval()
    with torch.no_grad():
        image = Image.open(image_path).convert('RGB')
        image = transform(image).unsqueeze(0).to(device)
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
        return class_names[predicted.item()]

# Testing dataset
print('STARTING PREDICTIONS.....')
results = []
test_images = [os.path.join(test_data_path, img) for img in os.listdir(test_data_path)]
for image_path in test_images:
    if os.path.isfile(image_path):
        prediction = predict_image(image_path, model)
        image_id = os.path.basename(image_path)
        results.append({"image_id": image_id, "class": prediction})
        print(f'Image: {image_id}, Predicted Class: {prediction}')



STARTING PREDICTIONS.....
Image: 02371.jpg, Predicted Class: siamese+cat
Image: 00767.jpg, Predicted Class: beaver
Image: 02360.jpg, Predicted Class: chihuahua
Image: 00266.jpg, Predicted Class: wolf
Image: 02450.jpg, Predicted Class: hippopotamus
Image: 01496.jpg, Predicted Class: german+shepherd
Image: 01600.jpg, Predicted Class: hippopotamus
Image: 00847.jpg, Predicted Class: hamster
Image: 00822.jpg, Predicted Class: beaver
Image: 00614.jpg, Predicted Class: mouse
Image: 02856.jpg, Predicted Class: spider+monkey
Image: 02621.jpg, Predicted Class: blue+whale
Image: 01850.jpg, Predicted Class: persian+cat
Image: 00878.jpg, Predicted Class: spider+monkey
Image: 01473.jpg, Predicted Class: pig
Image: 00091.jpg, Predicted Class: dolphin
Image: 01713.jpg, Predicted Class: spider+monkey
Image: 01306.jpg, Predicted Class: seal
Image: 00838.jpg, Predicted Class: siamese+cat
Image: 01760.jpg, Predicted Class: lion
Image: 02600.jpg, Predicted Class: hamster
Image: 00296.jpg, Predicted Class: 

In [16]:
# saving
submission = pd.DataFrame(results, columns=['image_id', 'class'])
submission.to_csv("/kaggle/working/submission.csv", index=False)
print("Predictions saved to /kaggle/working/submission.csv")

Predictions saved to /kaggle/working/submission.csv
