In [1]:
import numpy as np
import pandas as pd
import os
import zipfile
import matplotlib.pyplot as plt
import torch
from torchvision import datasets, transforms, models
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from skimage import io, transform
import torch.utils.data as data_utils
import torchvision.models as models
from tabulate import tabulate
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Subset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"murrmaringo","key":"f5b4c058e91c5ba7b8e4346e30cbc810"}'}

In [3]:
os.makedirs('/root/.kaggle', exist_ok=True)
!cp kaggle.json /root/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json
!pip install -q kaggle

!kaggle competitions download -c dog-breed-identification
with zipfile.ZipFile("dog-breed-identification.zip", 'r') as zip_ref:
    zip_ref.extractall("dog-breed-identification")

Downloading dog-breed-identification.zip to /content
 99% 684M/691M [00:01<00:00, 271MB/s]
100% 691M/691M [00:01<00:00, 371MB/s]


In [4]:
from sklearn.model_selection import train_test_split

labels = pd.read_csv('dog-breed-identification/labels.csv')

breeds = labels['breed'].unique()
breed_to_idx = {breed: idx for idx, breed in enumerate(breeds)}
idx_to_breed = {idx: breed for idx, breed in enumerate(breeds)}

# Add numerical labels
labels['label'] = labels['breed'].map(breed_to_idx)
train_df, valid_df = train_test_split(
    labels,
    train_size=0.8,
    shuffle=True,
    stratify=labels['breed'],
    random_state=42
)

In [5]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [6]:
class DogBreedDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.labels_df = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.labels_df.iloc[idx, 0] + '.jpg')
        image = Image.open(img_name).convert('RGB')
        label = self.labels_df.iloc[idx, -1]

        if self.transform:
            image = self.transform(image)

        return image, label

train_dataset = DogBreedDataset(
    dataframe=train_df,
    root_dir='dog-breed-identification/train',
    transform=train_transform
)

valid_dataset = DogBreedDataset(
    dataframe=valid_df,
    root_dir='dog-breed-identification/train',
    transform=val_transform
)

batch_size = 32

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4
)

val_loader = DataLoader(
    valid_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=4
)



In [7]:
n_classes = 120


criterion = nn.CrossEntropyLoss()

In [8]:
class BasicBlockNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
        self.bn2 = nn.BatchNorm2d(64)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
        self.bn3 = nn.BatchNorm2d(128)

        self.avg_pool = nn.AdaptiveAvgPool2d((4, 4))

        self.lin1 = nn.Linear(128 * 4 * 4, n_classes)  # 128 channels * 4*4 from pooling

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))

        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)  # Flatten
        out = self.lin1(x)

        return out

In [9]:
net = BasicBlockNet().to(device)

def test(model, loader):
    loss_log = []
    acc_log = []
    model.eval()

    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            loss_fn = nn.CrossEntropyLoss()
            output = model(data)
            loss = loss_fn(output, target)

            loss_log.append(loss.item())

            preds = output.argmax(dim=1)
            acc = (preds == target).float().mean()
            acc_log.append(acc.item())

        return np.mean(loss_log), np.mean(acc_log)

def train_epoch(model, optimizer, train_loader):
    loss_log = []
    acc_log = []
    model.train()

    for data, target in train_loader:

        data, target = data.to(device), target.to(device)
        loss_fn = nn.CrossEntropyLoss()

        optimizer.zero_grad()

        output = model(data)

        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()

        loss_log.append(loss.item())

        preds = output.argmax(dim=1)
        acc = (preds == target).float().mean()
        acc_log.append(acc.item())

    return loss_log, acc_log

def train(model, optimizer, n_epochs, train_loader, val_loader, scheduler=None):
    train_loss_log, train_acc_log, val_loss_log, val_acc_log = [], [], [], []

    for epoch in range(n_epochs):
        train_loss, train_acc = train_epoch(model, optimizer, train_loader)
        val_loss, val_acc = test(model, val_loader)

        train_loss_log.extend(train_loss)
        train_acc_log.extend(train_acc)

        val_loss_log.append(val_loss)
        val_acc_log.append(val_acc)

        print(f"Epoch {epoch}")
        print(f" train loss: {np.mean(train_loss)}, train acc: {np.mean(train_acc)}")
        print(f" val loss: {val_loss}, val acc: {val_acc}\n")

        if scheduler is not None:
            scheduler.step()

    return train_loss_log, train_acc_log, val_loss_log, val_acc_log

In [10]:
import torch.nn.functional as F

optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)
train_loss_log, train_acc_log, val_loss_log, val_acc_log = train(net, optimizer, 20, train_loader, val_loader)

Epoch 0
 train loss: 4.878618823364377, train acc: 0.0113525390625
 val loss: 4.735812082886696, val acc: 0.016163793101441115

Epoch 1
 train loss: 4.732528334483504, train acc: 0.0174416934751207
 val loss: 4.726931571960449, val acc: 0.015625

Epoch 2
 train loss: 4.695352049544454, train acc: 0.020263671875
 val loss: 4.70764497667551, val acc: 0.02685546875

Epoch 3
 train loss: 4.64138925075531, train acc: 0.0253762637876207
 val loss: 4.659614033997059, val acc: 0.02549164870288223

Epoch 4
 train loss: 4.623343113809824, train acc: 0.0292825137876207
 val loss: 4.604124873876572, val acc: 0.02685546875

Epoch 5
 train loss: 4.579483900219202, train acc: 0.0316018497251207
 val loss: 4.573637880384922, val acc: 0.02734375

Epoch 6
 train loss: 4.551852496340871, train acc: 0.03515625
 val loss: 4.587441615760326, val acc: 0.0390625

Epoch 7
 train loss: 4.5588672664016485, train acc: 0.0364990234375
 val loss: 4.629680536687374, val acc: 0.0283203125

Epoch 8
 train loss: 4.5459

In [14]:
def calc_accuracy(model, dataloader):
    y_pred = []
    y_test = []
    loss_sum = 0
    loss_fn = nn.CrossEntropyLoss()
    with torch.no_grad():
        for X, target in dataloader:
            X = X.cuda()
            target = target.cuda()
            logits = model(X)
            loss_sum += loss_fn(logits, target)
            y_pred.append(logits.argmax(dim=1))
            y_test.append(target)
        y_pred = torch.cat(y_pred)
        y_test = torch.cat(y_test)
    accuracy = torch.sum((y_pred == y_test)) / len(y_test)
    return accuracy, loss_sum.item(), y_pred, y_test

In [15]:
accuracy, _, y_pred, y_test = calc_accuracy(net, val_loader)

In [16]:
print(accuracy)

tensor(0.0479, device='cuda:0')


In [18]:
from tqdm import tqdm

def create_submission(model, test_dir, transform, device, breed_names):
    test_files = os.listdir(test_dir)
    model.eval()

    all_preds = []
    test_ids = []

    for file in tqdm(test_files, desc="Processing Test Images"):
        img_path = os.path.join(test_dir, file)
        image = Image.open(img_path).convert('RGB')
        image = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(image)
            probas = F.softmax(output, dim=1).cpu().numpy()[0]

        all_preds.append(probas)
        test_ids.append(file.split('.')[0])

    submission = pd.DataFrame(
        np.vstack(all_preds),
        columns=breed_names
    )
    submission.insert(0, 'id', test_ids)

    return submission

# Generate submission file
test_dir = 'dog-breed-identification/test'
breed_names = idx_to_breed.values()
submission = create_submission(net, test_dir, val_transform, device, breed_names)

submission.to_csv('submission.csv', index=False)
print("Submission file created successfully!")

Processing Test Images: 100%|██████████| 10357/10357 [00:48<00:00, 213.84it/s]


Submission file created successfully!
