In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd '/content/drive/My Drive/CV projects/DogBreedIdentification'

/content/drive/My Drive/CV projects/DogBreedIdentification


In [3]:
import pandas as pd
data = pd.read_csv('labels.csv')
data.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [4]:
print(f'{len(data.breed.unique())} classes')

120 classes


In [5]:
NUM_CLASSES = 16
selected_breed_list = list(data.breed.value_counts().head(NUM_CLASSES).index)
data =data[data['breed'].isin(selected_breed_list)]
data.reset_index(drop= True, inplace= True)
data.head()

Unnamed: 0,id,breed
0,003df8b8a8b05244b1d920bb6cf451f9,basenji
1,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound
2,00693b8bc2470375cc744a6391d397ec,maltese_dog
3,00bee065dcec471f26394855c5c2f3de,cairn
4,013f8fdf6d638c7bb042f5f17e8a9fdc,tibetan_terrier


In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
import os
from torch.utils.data import Dataset, DataLoader
class DogBreedDataset(Dataset):
  def __init__(self, dataframe, directory, transforms= None):
    super().__init__()
    self.df = dataframe
    self.dir = directory
    self.transforms = transforms

  def __len__(self):
    return len(self.df)

  def __getitem__(self, index):
    image_file = '{}.jpg'.format(self.df.loc[index, 'id'])
    image_path = os.path.join(self.dir, image_file)
    image = Image.open(image_path).convert('RGB').resize((224,224))
    np_dummies_label = pd.get_dummies(self.df, columns= ['breed']).iloc[index, 1:].to_numpy().astype('float')
    label = torch.FloatTensor(np_dummies_label.argmax())

    if self.transforms:
      image = self.transforms(image)

    return image, label
    

In [12]:
# Split train/val
from sklearn.model_selection import train_test_split
train_data, val_data = train_test_split(data, test_size= 0.2, random_state= 2020)
train_data.reset_index(drop= True, inplace= True)
val_data.reset_index(drop= True, inplace= True)

In [13]:
from torchvision import transforms
import numpy as np
IMAGE_MEAN = np.array([0.485, 0.456, 0.406])
IMAGE_STD = np.array([0.229, 0.224, 0.225])

train_transforms = transforms.Compose([
                                       transforms.Scale(224),
                                       transforms.ToTensor(),
                                       transforms.Normalize(IMAGE_MEAN, IMAGE_STD),
])

val_transforms = transforms.Compose([
                                       transforms.Scale(224),
                                       transforms.ToTensor(),
                                       transforms.Normalize(IMAGE_MEAN, IMAGE_STD)
])

  "please use transforms.Resize instead.")


In [27]:
from torch.utils.data import DataLoader
TRAIN_DIR = 'train'
BATCH_SIZE = 32

train_ds = DogBreedDataset(train_data, directory= TRAIN_DIR, transforms= train_transforms)
val_ds = DogBreedDataset(val_data, directory= TRAIN_DIR, transforms= val_transforms)

train_dl = DataLoader(train_ds, batch_size= BATCH_SIZE, shuffle= True, num_workers= 0)
val_dl = DataLoader(val_ds, batch_size= BATCH_SIZE, shuffle= False, num_workers= 0

In [28]:
# Take a batch
for image_batch, label_batch in train_dl:
  print(image_batch.shape)
  print(label_batch.shape)
  break

RuntimeError: ignored