In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
import pandas as pd
import os

# Directories
train_dir = '/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/train'
test_dir = '/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/test'
output_dir = '/kaggle/working/'

# Parameters
img_size = 128
batch_size = 32
num_classes = 40
epochs = 20  # Reduced for faster convergence

# Data Augmentation and Normalization
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(img_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

val_transforms = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Datasets and Loaders
train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
val_dataset = datasets.ImageFolder(train_dir, transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Load Pretrained ResNet
model = models.resnet50(weights='IMAGENET1K_V2')

# Modify Fully Connected Layer
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Freeze Early Layers (Optional - Uncomment to Fine-Tune Only Last Layers)
for param in model.parameters():
    param.requires_grad = False

for param in model.fc.parameters():
    param.requires_grad = True

# Use GPU if Available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Training Loop
def train_model(model, train_loader, val_loader, epochs):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct / total
        
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)
        
        val_epoch_loss = val_loss / len(val_loader.dataset)
        val_epoch_acc = val_correct / val_total
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}, Val Loss: {val_epoch_loss:.4f}, Val Accuracy: {val_epoch_acc:.4f}")
        scheduler.step()

# Train the Model
train_model(model, train_loader, val_loader, epochs)


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 210MB/s]


Epoch 1/20, Loss: 1.4284, Accuracy: 0.6681, Val Loss: 0.6420, Val Accuracy: 0.8341
Epoch 2/20, Loss: 0.7789, Accuracy: 0.7819, Val Loss: 0.4938, Val Accuracy: 0.8563
Epoch 3/20, Loss: 0.6726, Accuracy: 0.8013, Val Loss: 0.4324, Val Accuracy: 0.8669
Epoch 4/20, Loss: 0.6540, Accuracy: 0.8044, Val Loss: 0.4107, Val Accuracy: 0.8687
Epoch 5/20, Loss: 0.6038, Accuracy: 0.8186, Val Loss: 0.3563, Val Accuracy: 0.8840
Epoch 6/20, Loss: 0.5737, Accuracy: 0.8276, Val Loss: 0.3303, Val Accuracy: 0.8926
Epoch 7/20, Loss: 0.5535, Accuracy: 0.8323, Val Loss: 0.3383, Val Accuracy: 0.8929
Epoch 8/20, Loss: 0.5480, Accuracy: 0.8329, Val Loss: 0.3226, Val Accuracy: 0.8982
Epoch 9/20, Loss: 0.5351, Accuracy: 0.8353, Val Loss: 0.3112, Val Accuracy: 0.9015
Epoch 10/20, Loss: 0.5189, Accuracy: 0.8453, Val Loss: 0.3082, Val Accuracy: 0.9027
Epoch 11/20, Loss: 0.5285, Accuracy: 0.8429, Val Loss: 0.2879, Val Accuracy: 0.9070
Epoch 12/20, Loss: 0.4945, Accuracy: 0.8481, Val Loss: 0.2954, Val Accuracy: 0.9073
E

In [2]:
# Save the current state of the model
torch.save(model.state_dict(), '/kaggle/working/model_epoch_20.pth')

# Load the model's state dictionary
model.load_state_dict(torch.load('/kaggle/working/model_epoch_20.pth'))
model.eval()




  model.load_state_dict(torch.load('/kaggle/working/model_epoch_20.pth'))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [3]:
from PIL import Image

class CustomTestDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_paths = [os.path.join(image_dir, fname) for fname in os.listdir(image_dir) if fname.endswith(('.png', '.jpg', '.jpeg'))]
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, os.path.basename(image_path)


In [4]:
# Test Transforms
test_transforms = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Custom Test Dataset and DataLoader
test_dataset = CustomTestDataset(test_dir, transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)


In [5]:
# Test the model and generate predictions
model.eval()
predictions = []
class_labels = list(train_dataset.class_to_idx.keys())  # Get class labels from training dataset
image_ids = []

for inputs, image_name in test_loader:
    inputs = inputs.to(device)
    outputs = model(inputs)
    _, predicted = torch.max(outputs, 1)
    predictions.append(class_labels[predicted.item()])
    image_ids.append(image_name[0])  # Add the image name (basename)

# Prepare Submission
submission_df = pd.DataFrame({
    'image_id': image_ids,
    'class': predictions
})

# Save Submission
submission_df.to_csv(os.path.join(output_dir, 'submission1.csv'), index=False)
print("Submission saved!")


Submission saved!
