In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import torchvision
import torchvision.transforms as transforms
import os
import pandas as pd
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import models
from PIL import Image

In [2]:
train_dir = "/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/train"
test_dir = "/kaggle/input/vlg-recruitment-24-challenge/vlg-dataset/test"

In [3]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
device=get_default_device()
print(device)
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return (data.to(device, non_blocking=True))

cuda


In [4]:
# Data Preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = ImageFolder(root=train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,num_workers=2)

# Validate Class to Index Mapping
print("Class to Index Mapping:", train_dataset.class_to_idx)

Class to Index Mapping: {'antelope': 0, 'bat': 1, 'beaver': 2, 'blue+whale': 3, 'bobcat': 4, 'buffalo': 5, 'chihuahua': 6, 'cow': 7, 'dalmatian': 8, 'deer': 9, 'dolphin': 10, 'elephant': 11, 'german+shepherd': 12, 'giant+panda': 13, 'giraffe': 14, 'grizzly+bear': 15, 'hamster': 16, 'hippopotamus': 17, 'humpback+whale': 18, 'killer+whale': 19, 'leopard': 20, 'lion': 21, 'mole': 22, 'mouse': 23, 'otter': 24, 'ox': 25, 'persian+cat': 26, 'pig': 27, 'polar+bear': 28, 'raccoon': 29, 'rat': 30, 'seal': 31, 'siamese+cat': 32, 'skunk': 33, 'spider+monkey': 34, 'tiger': 35, 'walrus': 36, 'weasel': 37, 'wolf': 38, 'zebra': 39}


In [5]:
class DeviceDataLoader():
  def __init__(self,dl,device):
    self.dl=dl
    self.device=device
  def __iter__(self):
    for b in self.dl:
      yield to_device(b,self.device)
train_loader=DeviceDataLoader(train_loader,device)

In [6]:
class conv_block(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(conv_block, self).__init__()
        self.relu = nn.ReLU()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.batchnorm = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        return self.relu(self.batchnorm(self.conv(x)))


In [7]:
class Inception_block(nn.Module):
    def __init__(
        self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
    ):
        super(Inception_block, self).__init__()
        self.branch1 = conv_block(in_channels, out_1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            conv_block(in_channels, red_3x3, kernel_size=1),
            conv_block(red_3x3, out_3x3, kernel_size=(3, 3), padding=1),
        )

        self.branch3 = nn.Sequential(
            conv_block(in_channels, red_5x5, kernel_size=1),
            conv_block(red_5x5, out_5x5, kernel_size=5, padding=2),
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            conv_block(in_channels, out_1x1pool, kernel_size=1),
        )

    def forward(self, x):
        return torch.cat(
            [self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)], 1
        )

In [8]:
class GoogLeNet(nn.Module):
    def __init__(self, num_classes=40):
        super(GoogLeNet, self).__init__()
        self.conv1 = conv_block(
            in_channels=3,
            out_channels=64,
            kernel_size=7,
            stride=2,
            padding=3,
        )

        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = conv_block(64, 192, kernel_size=3, stride=1, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # In this order: in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
        self.inception3a = Inception_block(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = Inception_block(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception4a = Inception_block(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception_block(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception_block(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception_block(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception_block(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception5a = Inception_block(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception_block(832, 384, 192, 384, 48, 128, 128)

        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.dropout = nn.Dropout(p=0.4)
        self.fc1 = nn.Linear(1024, num_classes)
    def forward(self,x):
        x=self.conv1(x)
        x=self.maxpool1(x)
        x=self.conv2(x)
        x=self.maxpool2(x)

        x=self.inception3a(x)
        x=self.inception3b(x)
        x=self.maxpool3(x)

        x=self.inception4a(x)
        x=self.inception4b(x)
        x=self.inception4c(x)
        x=self.inception4d(x)
        x=self.inception4e(x)
        x=self.maxpool4(x)
        
        x=self.inception5a(x)
        x=self.inception5b(x)
        x=self.avgpool(x)
        x=self.dropout(x)
        x = x.view(x.size(0), -1)
        x=self.fc1(x)
        return x
        


In [9]:
model=GoogLeNet()
model.to(device)

GoogLeNet(
  (conv1): conv_block(
    (relu): ReLU()
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (conv2): conv_block(
    (relu): ReLU()
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batchnorm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (inception3a): Inception_block(
    (branch1): conv_block(
      (relu): ReLU()
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
      (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): conv_block(
        (relu): ReLU()
        (conv): Conv2d(192, 96, kernel_size

In [10]:
def loss_batch(model,loss_func,xb,yb,opt=None,metric=None):
    preds=model(xb)
    loss=loss_func(preds,yb)
    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()
    metric_batch=None
    if metric is not None:
        metric_batch=metric(preds,yb)
    else:
      metric_batch=None
    return loss.item(),len(xb),metric_batch

In [11]:
def accuracy(outputs,labels):
    _,preds=torch.max(outputs,dim=1)
    return torch.tensor(torch.sum(preds==labels).item()/len(preds))

In [12]:
def evaluate(model,loss_func,valid_dl,metric=None):
  model.eval()
  with torch.no_grad():
    results=[loss_batch(model,loss_func,xb,yb,metric=metric) for xb,yb in valid_dl]
    losses,nums,metric_list=zip(*results)
    total=np.sum(nums)
    avg_loss=np.sum(np.multiply(losses,nums))/total
    metric_list=[x for x in metric_list if x is not None]
    if metric is None:
        return avg_loss
    else:
        return avg_loss,np.mean(metric_list)


In [13]:
def fit(epochs,model,loss_func,train_dl,metric=None,opt_func=None):
  train_losses,metrics_list=[],[]
  opt=opt_func(model.parameters(),lr=0.001,momentum=0.9,weight_decay=1e-4, 
    nesterov=True)
  for epoch in range(epochs):
    model.train()
    for xb,yb in train_loader:
      train_loss,_,_=loss_batch(model,loss_func,xb,yb,opt)
    train_losses.append(train_loss)
    avg_loss,avg_metric=evaluate(model,loss_func,train_loader,metric)
    
    metrics_list.append(avg_metric)
    if metric is None:
      print('Epoch [{}/{}],train_loss:{:.4f},val_loss:{:.4f}'.format(epoch+1,epochs,train_loss,avg_loss))
    else:
      print('Epoch [{}/{}],train_loss:{:.4f},val_loss:{:.4f},val_metric:{:.4f}'.format(epoch+1,epochs,train_loss,avg_loss,avg_metric))
  return train_losses,metrics_list

In [14]:
optimizer = torch.optim.SGD 
    
loss_func=torch.nn.CrossEntropyLoss()

In [15]:
losses,accuracylist=fit(20,model,loss_func,train_loader,metric=accuracy,opt_func=optimizer)

Epoch [1/20],train_loss:3.1195,val_loss:2.9134,val_metric:0.1972
Epoch [2/20],train_loss:3.0917,val_loss:2.6102,val_metric:0.2725
Epoch [3/20],train_loss:2.3693,val_loss:2.4687,val_metric:0.3042
Epoch [4/20],train_loss:2.5944,val_loss:2.3968,val_metric:0.3238
Epoch [5/20],train_loss:2.8330,val_loss:2.1089,val_metric:0.3893
Epoch [6/20],train_loss:2.2052,val_loss:2.0767,val_metric:0.3943
Epoch [7/20],train_loss:2.9264,val_loss:1.8847,val_metric:0.4444
Epoch [8/20],train_loss:1.3003,val_loss:1.7731,val_metric:0.4781
Epoch [9/20],train_loss:1.9658,val_loss:1.7838,val_metric:0.4745
Epoch [10/20],train_loss:1.5893,val_loss:1.6478,val_metric:0.5044
Epoch [11/20],train_loss:2.2887,val_loss:1.5558,val_metric:0.5398
Epoch [12/20],train_loss:1.2828,val_loss:1.5897,val_metric:0.5232
Epoch [13/20],train_loss:1.4163,val_loss:1.4853,val_metric:0.5492
Epoch [14/20],train_loss:1.0948,val_loss:1.3711,val_metric:0.5839
Epoch [15/20],train_loss:1.3768,val_loss:1.3689,val_metric:0.5859
Epoch [16/20],train

In [16]:
model.eval()
test_images = [f for f in os.listdir(test_dir) if f.endswith('.jpg')]
test_predictions = []

for img_name in test_images:
    img_path = os.path.join(test_dir, img_name)
    image = Image.open(img_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(image)
        predicted_class = torch.argmax(outputs, dim=1).item()
        test_predictions.append((img_name, train_dataset.classes[predicted_class]))

# Save Predictions
submission = pd.DataFrame(test_predictions, columns=['image_id', 'class'])
submission=submission.sort_values('image_id')
print(submission.head())
submission.to_csv("/kaggle/working/SUBMISSIONINCEPTIONNET2nd.csv", index=False) 

       image_id       class
1451  00001.jpg        wolf
2500  00002.jpg  blue+whale
620   00003.jpg      bobcat
356   00004.jpg    elephant
2861  00005.jpg         cow
