In [None]:
import torch
import torchvision
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch import nn
from zipfile import ZipFile
import os
import requests

##Get Setup

In [None]:
try:
  from torchinfo import summary
except:
  !pip install -q torchinfo
  from torchinfo import summary

In [None]:
try:
  from going_modular import data_setup,engine
except:
  !git clone https://github.com/mr-ssrakesh/Datasets
  !mv Datasets/going_modular .
  !rm -rf Datasets
  from going_modular import data_setup,engine

Cloning into 'Datasets'...
remote: Enumerating objects: 29, done.[K
remote: Counting objects: 100% (29/29), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 29 (delta 2), reused 8 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (29/29), 14.54 KiB | 3.63 MiB/s, done.
Resolving deltas: 100% (2/2), done.


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [None]:
def set_seed(seed:int=42):
  torch.cuda.manual_seed(seed)
  torch.manual_seed(seed)

##Get Data

In [None]:
def download_data(source:str,
                  destination:str,
                  remove_source:bool=True):
  data_path=Path('data/')
  image_path=data_path/destination

  if image_path.is_dir():
    print('[INFO] Already downloaded ... skipping download')

  else:
    image_path.mkdir(parents=True,exist_ok=True)
    target_file=Path(source).name

    with open(data_path/target_file,'wb')as f:
      request=requests.get(source)
      f.write(request.content)
      print('[INFO] Downloading completed')

    with ZipFile(data_path/target_file,'r')as zipref:
      zipref.extractall(image_path)

  if remove_source:
      os.remove(data_path/target_file)

  return data_path


download_data(source='https://github.com/mr-ssrakesh/Datasets/raw/refs/heads/main/dog-breed-identification.zip',
              destination='dog_breed_images')


[INFO] Downloading completed


PosixPath('data')

In [None]:
labels=pd.read_csv('data/dog_breed_images/labels.csv')
labels

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever
...,...,...
10217,ffd25009d635cfd16e793503ac5edef0,borzoi
10218,ffd3f636f7f379c51ba3648a9ff8254f,dandie_dinmont
10219,ffe2ca6c940cddfee68fa3cc6c63213f,airedale
10220,ffe5f6d8e2bff356e9482a80a6e29aac,miniature_pinscher


In [None]:
labels['breed'].value_counts()

Unnamed: 0_level_0,count
breed,Unnamed: 1_level_1
scottish_deerhound,126
maltese_dog,117
afghan_hound,116
entlebucher,115
bernese_mountain_dog,114
...,...
golden_retriever,67
komondor,67
brabancon_griffon,67
eskimo_dog,66


In [None]:
import pandas as pd
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from PIL import Image
import torch

# Paths
data_dir = Path("data/dog_breed_images")
train_dir = data_dir / "train"
test_dir = data_dir / "test"
labels_csv = data_dir / "labels.csv"

# Read labels CSV
labels_df = pd.read_csv(labels_csv)
print(labels_df.head())

# Create mapping from breed names to numeric labels
class_names = sorted(labels_df["breed"].unique())
class_to_idx = {breed: idx for idx, breed in enumerate(class_names)}

# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Custom Dataset
class DogBreedDataset(torch.utils.data.Dataset):
    def __init__(self, images_dir, labels_df=None, transform=None):
        self.images_dir = images_dir
        self.labels_df = labels_df
        self.transform = transform

    def __len__(self):
        return len(self.labels_df) if self.labels_df is not None else len(list(self.images_dir.glob("*.jpg")))

    def __getitem__(self, idx):
        if self.labels_df is not None:
            row = self.labels_df.iloc[idx]
            img_path = self.images_dir / f'{row["id"]}.jpg'
            label = class_to_idx[row["breed"]]
        else:
            img_name = list(self.images_dir.glob("*.jpg"))[idx]
            img_path = img_name
            label = -1  # No label for test data

        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)

        return img, label

# Create datasets
train_dataset = DogBreedDataset(train_dir, labels_df, transform)
test_dataset = DogBreedDataset(test_dir, transform=transform)

# DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)


                                 id             breed
0  000bec180eb18c7604dcecc8fe0dba07       boston_bull
1  001513dfcb2ffafc82cccf4d8bbaba97             dingo
2  001cdf01b096e06d78e9e5112d419397          pekinese
3  00214f311d5d2247d5dfe4fe24b2303d          bluetick
4  0021f9ceb3235effd7fcde7f7538ed62  golden_retriever


In [None]:
len(train_dataset),len(test_dataset)

(10222, 10357)

In [None]:
img,label=next(iter(train_dataloader))
img.size(),len(label)

(torch.Size([32, 3, 224, 224]), 32)

In [None]:
import random
from PIL import Image

# Set seed
random.seed(42)

ids_list=[id for id in labels_df['id']]
random_id=random.sample(ids_list,k=1)

class_label=labels_df.loc[random_id,'breed']
class_label

KeyError: "None of [Index(['2d18755cd7a1155c3944910a6dffd377'], dtype='object')] are in the [index]"

In [None]:
class_to_idx

{'affenpinscher': 0,
 'afghan_hound': 1,
 'african_hunting_dog': 2,
 'airedale': 3,
 'american_staffordshire_terrier': 4,
 'appenzeller': 5,
 'australian_terrier': 6,
 'basenji': 7,
 'basset': 8,
 'beagle': 9,
 'bedlington_terrier': 10,
 'bernese_mountain_dog': 11,
 'black-and-tan_coonhound': 12,
 'blenheim_spaniel': 13,
 'bloodhound': 14,
 'bluetick': 15,
 'border_collie': 16,
 'border_terrier': 17,
 'borzoi': 18,
 'boston_bull': 19,
 'bouvier_des_flandres': 20,
 'boxer': 21,
 'brabancon_griffon': 22,
 'briard': 23,
 'brittany_spaniel': 24,
 'bull_mastiff': 25,
 'cairn': 26,
 'cardigan': 27,
 'chesapeake_bay_retriever': 28,
 'chihuahua': 29,
 'chow': 30,
 'clumber': 31,
 'cocker_spaniel': 32,
 'collie': 33,
 'curly-coated_retriever': 34,
 'dandie_dinmont': 35,
 'dhole': 36,
 'dingo': 37,
 'doberman': 38,
 'english_foxhound': 39,
 'english_setter': 40,
 'english_springer': 41,
 'entlebucher': 42,
 'eskimo_dog': 43,
 'flat-coated_retriever': 44,
 'french_bulldog': 45,
 'german_shepherd'