In [1]:
import torch
import os
import torch.nn.functional as F
import pandas as pd
from PIL import Image
from pathlib import Path
import pandas as pd
import torchvision
from torch.utils.data import TensorDataset
from torchvision.models import resnet34, ResNet34_Weights
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

In [2]:
labels=pd.read_csv('labels.csv')
train,valid=train_test_split(labels,train_size=0.8,shuffle=True,stratify=labels['breed'],random_state=42)

In [3]:
train,train_labels=train['id'].reset_index(drop=True),train['breed'].reset_index(drop=True)
val,val_labels=valid['id'].reset_index(drop=True),valid['breed'].reset_index(drop=True)

In [4]:
'''Encoding labels'''

breeds=dict()
breed_count=1

for breed in labels['breed'].value_counts().index:

    breeds[breed]=breed_count-1
    breed_count+=1

val_labels_torch=torch.zeros(len(val_labels),1)
train_labels_torch=torch.zeros(len(train_labels),1)

for index in val_labels.index:
    val_labels_torch[index]=breeds[val_labels.iloc[index]]

for index in train_labels.index:
    train_labels_torch[index]=breeds[train_labels.iloc[index]]

val_labels_torch = val_labels_torch.long()
train_labels_torch = train_labels_torch.long()


val_labels_torch[91]
# val_labels=torch.tensor(val_labels.values)
# train_labels=torch.tensor(train_labels.values)

tensor([21])

In [5]:
transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224, 224)),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomRotation(10),
    torchvision.transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    torchvision.transforms.ToTensor()
])


def create_dataset(series, directory_path, transform):
    tensors = []
    for i in range(len(series)):
        if(series[i].endswith('.jpg')):
            img_file=series[i]
        else:
            img_file = series[i] + '.jpg'
        img_path = os.path.join(directory_path, img_file)
        img = Image.open(img_path).convert('RGB')  # Ensure RGB
        img_t = transform(img)
        tensors.append(img_t)
    return torch.stack(tensors, dim=0)

In [6]:
directory_path=Path('./train')
train_dataset=create_dataset(train,directory_path,transform)

In [7]:
# For validation: no augmentation
val_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224, 224)),
    torchvision.transforms.ToTensor()
])
val_dataset = create_dataset(val, directory_path, transform=val_transform)

In [8]:

train_dataset_torch=TensorDataset(train_dataset,train_labels_torch)
val_dataset_torch=TensorDataset(val_dataset,val_labels_torch)

In [9]:
train_dataset.shape,train_labels_torch.shape

(torch.Size([8177, 3, 224, 224]), torch.Size([8177, 1]))

In [10]:
val_dataset_torch[0]

(tensor([[[0.2980, 0.2667, 0.1569,  ..., 0.2510, 0.0471, 0.0353],
          [0.2275, 0.1490, 0.2157,  ..., 0.1020, 0.0196, 0.0078],
          [0.2000, 0.1098, 0.2745,  ..., 0.0510, 0.0275, 0.0157],
          ...,
          [0.1176, 0.2118, 0.1843,  ..., 0.1176, 0.1373, 0.1451],
          [0.1490, 0.1843, 0.1804,  ..., 0.1765, 0.1569, 0.1451],
          [0.2431, 0.2784, 0.2118,  ..., 0.1529, 0.1216, 0.1059]],
 
         [[0.3765, 0.3373, 0.2275,  ..., 0.2863, 0.0549, 0.0314],
          [0.3137, 0.2314, 0.2980,  ..., 0.1373, 0.0275, 0.0039],
          [0.2941, 0.2039, 0.3647,  ..., 0.0824, 0.0392, 0.0157],
          ...,
          [0.1176, 0.2118, 0.1882,  ..., 0.1333, 0.1529, 0.1608],
          [0.1490, 0.1843, 0.1843,  ..., 0.1843, 0.1647, 0.1529],
          [0.2431, 0.2784, 0.2157,  ..., 0.1569, 0.1255, 0.1059]],
 
         [[0.3765, 0.3608, 0.2588,  ..., 0.2235, 0.0431, 0.0510],
          [0.2667, 0.2000, 0.2706,  ..., 0.0745, 0.0157, 0.0235],
          [0.1961, 0.1176, 0.2902,  ...,

In [11]:
class ResNetClassifier(nn.Module):
    def __init__(self, num_classes=120):
        super().__init__()
        base_model = resnet34(weights=ResNet34_Weights.IMAGENET1K_V1)
        for param in base_model.parameters():
            param.requires_grad = False

        self.features = nn.Sequential(*list(base_model.children())[:-1]) 
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(base_model.fc.in_features, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.BatchNorm1d(256),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [12]:
# Instantiate and move model to device
model_torch = ResNetClassifier(num_classes=120).to('cpu')
optimizer = optim.Adam(model_torch.parameters(), lr=1e-4)
loss_fn = nn.CrossEntropyLoss()

In [13]:

train_loader = DataLoader(train_dataset_torch, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset_torch, batch_size=64, shuffle=False)

n_epochs = 10
for i in range(1, n_epochs+1):
    model_torch.train()
    training_loss = 0.0
    for img, label in train_loader:
        img = img.to('cpu')
        label = label.to('cpu').squeeze(1)

        outputs = model_torch(img)
        optimizer.zero_grad()        # <-- fixed typo here
        losses = loss_fn(outputs, label)
        training_loss += losses.item()
        losses.backward()
        optimizer.step()             # <-- fixed typo here

    model_torch.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for img, label in val_loader:
            img = img.to('cpu')
            label = label.to('cpu').squeeze(1)

            outputs = model_torch(img)
            loss = loss_fn(outputs, label)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += label.size(0)
            correct += (predicted == label).sum().item()

    print(f"Epoch [{i}/{n_epochs}], Training Loss: {training_loss/len(train_loader):.4f}, Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")



Epoch [1/10], Training Loss: 4.4611, Validation Loss: 3.8650, Accuracy: 31.10%
Epoch [2/10], Training Loss: 3.5523, Validation Loss: 3.1373, Accuracy: 50.95%
Epoch [3/10], Training Loss: 2.9065, Validation Loss: 2.5992, Accuracy: 59.61%
Epoch [4/10], Training Loss: 2.4378, Validation Loss: 2.2235, Accuracy: 64.16%
Epoch [5/10], Training Loss: 2.1012, Validation Loss: 1.9208, Accuracy: 67.14%
Epoch [6/10], Training Loss: 1.8274, Validation Loss: 1.7113, Accuracy: 69.34%
Epoch [7/10], Training Loss: 1.6240, Validation Loss: 1.5335, Accuracy: 70.61%
Epoch [8/10], Training Loss: 1.4584, Validation Loss: 1.4275, Accuracy: 72.18%
Epoch [9/10], Training Loss: 1.3190, Validation Loss: 1.3263, Accuracy: 73.25%
Epoch [10/10], Training Loss: 1.2139, Validation Loss: 1.2450, Accuracy: 73.06%


In [14]:
# Save the correct model
torch.save(model_torch.state_dict(), 'resnet_custom_head.pth')

In [15]:
os.listdir('./test')[1:10]

['00102ee9d8eb90812350685311fe5890.jpg',
 '0012a730dfa437f5f3613fb75efcd4ce.jpg',
 '001510bc8570bbeee98c8d80c8a95ec1.jpg',
 '001a5f3114548acdefa3d4da05474c2e.jpg',
 '00225dcd3e4d2410dd53239f95c0352f.jpg',
 '002c2a3117c2193b4d26400ce431eebd.jpg',
 '002c58d413a521ae8d1a5daeb35fc803.jpg',
 '002f80396f1e3db687c5932d7978b196.jpg',
 '0036c6bcec6031be9e62a257b1c3c442.jpg']

In [16]:
test=os.listdir('./test')
test_dataset=create_dataset(test,'./test',val_transform)
test_dataset=test_dataset.to(device='cpu')

In [17]:
output=pd.DataFrame(columns=['file',*breeds.keys()])

In [18]:
output

Unnamed: 0,file,scottish_deerhound,maltese_dog,afghan_hound,entlebucher,bernese_mountain_dog,shih-tzu,great_pyrenees,pomeranian,basenji,...,walker_hound,german_shepherd,otterhound,giant_schnauzer,tibetan_mastiff,golden_retriever,komondor,brabancon_griffon,eskimo_dog,briard


In [19]:
breeds

{'scottish_deerhound': 0,
 'maltese_dog': 1,
 'afghan_hound': 2,
 'entlebucher': 3,
 'bernese_mountain_dog': 4,
 'shih-tzu': 5,
 'great_pyrenees': 6,
 'pomeranian': 7,
 'basenji': 8,
 'samoyed': 9,
 'tibetan_terrier': 10,
 'airedale': 11,
 'leonberg': 12,
 'cairn': 13,
 'japanese_spaniel': 14,
 'beagle': 15,
 'australian_terrier': 16,
 'miniature_pinscher': 17,
 'blenheim_spaniel': 18,
 'irish_wolfhound': 19,
 'saluki': 20,
 'lakeland_terrier': 21,
 'papillon': 22,
 'norwegian_elkhound': 23,
 'whippet': 24,
 'siberian_husky': 25,
 'pug': 26,
 'chow': 27,
 'italian_greyhound': 28,
 'pembroke': 29,
 'ibizan_hound': 30,
 'border_terrier': 31,
 'newfoundland': 32,
 'lhasa': 33,
 'silky_terrier': 34,
 'dandie_dinmont': 35,
 'bedlington_terrier': 36,
 'sealyham_terrier': 37,
 'rhodesian_ridgeback': 38,
 'irish_setter': 39,
 'old_english_sheepdog': 40,
 'collie': 41,
 'boston_bull': 42,
 'schipperke': 43,
 'kelpie': 44,
 'african_hunting_dog': 45,
 'bouvier_des_flandres': 46,
 'english_foxhou

In [20]:
i=0
for file,sample in zip(os.listdir('./test'),test_dataset):
    sample=sample.cpu()
    output_values=model_torch(sample.unsqueeze(dim=0))
    output_values=output_values.cpu()
    probabilities = F.softmax(output_values, dim=1)
    output.loc[i]=[file,*probabilities[0].detach().numpy()]
    i+=1

In [21]:
output.head()

Unnamed: 0,file,scottish_deerhound,maltese_dog,afghan_hound,entlebucher,bernese_mountain_dog,shih-tzu,great_pyrenees,pomeranian,basenji,...,walker_hound,german_shepherd,otterhound,giant_schnauzer,tibetan_mastiff,golden_retriever,komondor,brabancon_griffon,eskimo_dog,briard
0,000621fb3cbb32d8935728e48679680e.jpg,0.000659,0.003311,0.009713,0.003379,0.000892,0.050852,0.003861,0.012938,0.000909,...,0.000664,0.000435,0.00235,0.000819,0.000631,0.00308,0.002024,0.004601,0.000908,0.001707
1,00102ee9d8eb90812350685311fe5890.jpg,0.001723,0.002765,0.0004,0.00117,0.000852,0.000727,0.018708,0.028496,0.00259,...,0.002622,0.002021,0.001148,0.000648,0.002833,0.001017,0.003087,0.001671,0.068679,0.000746
2,0012a730dfa437f5f3613fb75efcd4ce.jpg,0.004149,0.00046,0.009862,0.001562,0.003852,0.000822,0.014213,0.000407,0.000114,...,0.002447,0.000137,0.001005,0.000294,0.000404,0.004789,0.000972,0.000301,0.00027,0.000872
3,001510bc8570bbeee98c8d80c8a95ec1.jpg,0.016575,0.002207,0.013036,0.002148,0.006814,0.007521,0.007635,0.00437,0.002209,...,0.004652,0.001151,0.005507,0.022319,0.004939,0.002218,0.015868,0.002977,0.005455,0.012818
4,001a5f3114548acdefa3d4da05474c2e.jpg,0.000641,0.012729,0.001877,0.004778,0.004254,0.051049,0.005813,0.001339,0.001183,...,0.001333,0.000493,0.011295,0.003353,0.006115,0.001177,0.006312,0.006993,0.000859,0.033452


In [22]:
output.to_csv('test_submission.csv',index=False)

In [23]:
import json
with open("breed_mapping.json", "w") as f:
    json.dump(breeds, f)
