In [1]:
# From: https://www.kaggle.com/c/dog-breed-identification/data
# Author: Morpheus Hsieh

from __future__ import print_function, division

# import os, sys
import copy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
from PIL import Image
from os import listdir
from os.path import join

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision import datasets, models, transforms, utils

In [29]:
RawPath = r'D:\GitWork\dog_breed\data\raw'
print("Raw path: '{}'".format(RawPath))

ProcPath = r'D:\GitWork\dog_breed\data\processed'
print("Processed path: '{}'".format(ProcPath))

fname_breeds_dict = 'breeds_dict.csv'
fname_labels_proc = 'labels_processed.csv'

BatchSize = 16
NUM_BREED_CLASSES = 16
FRAC_FOR_TRAIN = 0.8

Phase = ['train', 'valid']

npz_files = {
    'train': 'train_data.npz',
    'valid': 'valid_data.npz'
}

npz_columns = ['images', 'labels']

Raw path: 'D:\GitWork\dog_breed\data\raw'
Processed path: 'D:\GitWork\dog_breed\data\processed'


In [None]:
# Load most popular 16 breeds and breed ids
csv_breeds = join(ProcPath, fname_breeds_dict)
df_breeds = pd.read_csv(csv_breeds)
print(df_breeds.info());
print(df_breeds.head())

selected_breed_list = list(df_breedict['breed'][:NUM_BREED_CLASSES] )
print(); print(selected_breed_list)

# Create breed dict
breed_data = df_labels[df_labels['breed'].isin(selected_breed_list)]



# import json

# # Load breed dictionary from csv file
# def getBreedDict(path, fname):
#     csv_abspath = join(ProcPath, breed_dict_fname)
#     df = pd.read_csv(csv_abspath)
#     dic = {}
#     for i, (b, bid) in df.iterrows():
#         dic[bid] = b
#     return dic    
    
# breed_dict_fname = 'breed_dict.csv'
# breed_dict_bw = getBreedDict(ProcPath, breed_dict_fname)
# print(json.dumps(breed_dict_bw, indent=4))
# # print(breed_dict_bw)


In [30]:
# Filter labels data accoring to selected breeds
csv_labels = join(ProcPath, fname_labels_proc)
df_labels  = pd.read_csv(csv_labels)
train_data = df_labels[df_labels['breed'].isin(selected_breed_list)]
print(train_data)

img_list = list(train_data['image'])
lbl_list = list(train_data['breed_id'])

data_rows = len(train_data)
print('\nTotal rows:', data_rows)

train_len = int(float(FRAC_FOR_TRAIN) * float(data_rows))
print('Train len :', train_len)
print('Valid len :', data_rows - train_len)

train_imgs = img_list[:train_len]
train_lbls = lbl_list[:train_len]

valid_imgs = img_list[train_len:]
valid_lbls = lbl_list[train_len:]

data = [
    [train_imgs, train_lbls],
    [valid_imgs, valid_lbls]
]

# Save data as npz file
for i in range(len(Phase)):
    args = { npz_columns[0]: data[i][0], npz_columns[1]: data[i][1] }
    fname = npz_files[Phase[i]]
    out_fname = join(ProcPath, fname)
    np.savez(out_fname, **args)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   breed     120 non-null    object
 1   count     120 non-null    int64 
 2   breed_id  120 non-null    int64 
dtypes: int64(2), object(1)
memory usage: 2.9+ KB
None
                  breed  count  breed_id
0    scottish_deerhound    126         1
1           maltese_dog    117         2
2          afghan_hound    116         3
3           entlebucher    115         4
4  bernese_mountain_dog    114         5

['scottish_deerhound', 'maltese_dog', 'afghan_hound', 'entlebucher', 'bernese_mountain_dog', 'shih-tzu', 'great_pyrenees', 'pomeranian', 'basenji', 'samoyed', 'airedale', 'tibetan_terrier', 'leonberg', 'cairn', 'beagle', 'japanese_spaniel']
                                     id               breed  breed_id  \
8      003df8b8a8b05244b1d920bb6cf451f9             basenji         9   
9      0042188c89

In [31]:
# Normalize
normalize = transforms.Normalize(
    mean = [0.485, 0.456, 0.406],
    std  = [0.229, 0.224, 0.225]
)

# Transform
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    normalize
])

class myDataset(Dataset):
    
    npzFiles = { 
        'train': 'train_data.npz', 
        'valid': 'valid_data.npz' 
    }

    def __init__(self, path, phase='train', transform=None):
        npz_fname = self.npzFiles[phase]
        print(npz_fname)
        
        data = np.load(join(path, npz_fname), allow_pickle=True)
        self.images = data['images']
        self.labels = data['labels']

        self.transform = transform
        self.len = len(self.images)

    def __getitem__(self, index):
        img_path = self.images[index]
        img_pil = Image.open(img_path)

        if self.transform is not None:
            img = self.transform(img_pil)

        lbl = float(self.labels[index])
        
        return [img, lbl]

    def __len__(self):
        return self.len

    
trainSet = myDataset(ProcPath, transform=transform)
validSet = myDataset(ProcPath, phase='valid', transform=transform)
print('\nTrainSet size: ', len(trainSet))
print('ValidSet size: ', len(validSet))

trainLoader = DataLoader(trainSet, batch_size=BatchSize, shuffle=True)
validLoader = DataLoader(validSet, batch_size=BatchSize, shuffle=False)
print('\nTrainLoader size: ', len(trainLoader))
print('ValidLoader size: ', len(validLoader))

trainSize = len(trainSet)
validSize = len(validSet)

imgs, lbls = next(iter(trainLoader))
print('\nImage type: ', type(imgs))
print('      size: ', imgs.size())

print('\nLabel type: ', type(lbls))
print('      size: ', lbls.size())

img = imgs[0]
print('\nImage: ', img.shape)
print(); print(img)

print('\nLabel: ', lbls)

train_data.npz
valid_data.npz

TrainSet size:  1421
ValidSet size:  356

TrainLoader size:  89
ValidLoader size:  23

Image type:  <class 'torch.Tensor'>
      size:  torch.Size([16, 3, 224, 224])

Label type:  <class 'torch.Tensor'>
      size:  torch.Size([16])

Image:  torch.Size([3, 224, 224])

tensor([[[ 0.0741,  0.0741,  0.0741,  ..., -1.8610, -1.8610, -1.8439],
         [ 0.0741,  0.0741,  0.0741,  ..., -1.9124, -1.9124, -1.9295],
         [ 0.0741,  0.0741,  0.0741,  ..., -1.9124, -1.9124, -1.9124],
         ...,
         [-0.3712, -0.3027, -0.3027,  ...,  0.0056, -0.0287, -0.0287],
         [-0.2171, -0.2513, -0.2342,  ...,  0.0398,  0.0398,  0.0398],
         [ 0.0227, -0.0116,  0.0227,  ..., -0.0972, -0.0801, -0.0116]],

        [[ 0.2227,  0.2227,  0.2227,  ..., -1.7731, -1.7731, -1.7556],
         [ 0.2227,  0.2227,  0.2227,  ..., -1.8256, -1.8256, -1.8431],
         [ 0.2227,  0.2227,  0.2227,  ..., -1.8256, -1.8256, -1.8256],
         ...,
         [-0.5126, -0.4426, -0.

In [33]:
# Imshow for Tensor
def imshow(inp, title=None):
    inp  = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std  = np.array([0.229, 0.224, 0.225])
    inp  = std * inp + mean
    plt.figure(figsize=(16, 4))
    plt.imshow(inp)
    
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated
    return

sampleLoader = DataLoader(trainSet, batch_size=6, shuffle=True)    

# Get a batch of training data
imgs, lbls = next(iter(sampleLoader))
print('image size: {}'.format(imgs.size()))
print('label size: {}'.format(lbls.size()))

# Make a grid from batch
out = torchvision.utils.make_grid(imgs)
imshow(out, title=[breed_dict_bw.get(x.item()) for x in lbls])

image size: torch.Size([6, 3, 224, 224])
label size: torch.Size([6])


NameError: name 'breed_dict_bw' is not defined

In [None]:
use_gpu = torch.cuda.is_available()
device = torch.device("cuda:0" if use_gpu else "cpu")
print(device)

In [None]:
# model
resnet = models.resnet50(pretrained=True)
inputs, labels = next(iter(trainLoader))
if use_gpu:
    resnet = resnet.cuda()
    inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())   
else:
    inputs, labels = Variable(inputs), Variable(labels)

outputs = resnet(inputs)
outputs.size()

In [None]:
def train_model(dataloders, model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    use_gpu = torch.cuda.is_available()
    best_model_wts = model.state_dict()
    best_acc = 0.0
    dataset_sizes = {'train': len(dataloders['train'].dataset), 
                     'valid': len(dataloders['valid'].dataset)}

    for epoch in range(num_epochs):
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train(True)
            else:
                model.train(False)

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloders[phase]:
                if use_gpu:
                    inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                optimizer.zero_grad()

                with torch.set_grad_enabled(True):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs.data, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == labels.data)
                
            scheduler.step()
            
            if phase == 'train':
                train_epoch_loss = running_loss / dataset_sizes[phase]
                train_epoch_acc = running_corrects / dataset_sizes[phase]
            else:
                valid_epoch_loss = running_loss / dataset_sizes[phase]
                valid_epoch_acc = running_corrects / dataset_sizes[phase]
                
            if phase == 'valid' and valid_epoch_acc > best_acc:
                best_acc = valid_epoch_acc
                best_model_wts = model.state_dict()

        print('Epoch [{}/{}] train loss: {:.4f} acc: {:.4f} ' 
              'valid loss: {:.4f} acc: {:.4f}'.format(
                epoch, num_epochs - 1,
                train_epoch_loss, train_epoch_acc, 
                valid_epoch_loss, valid_epoch_acc))
            
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model

In [None]:
resnet = models.resnet50(pretrained=True)

# freeze all model parameters
for param in resnet.parameters():
    param.requires_grad = False

# new final layer with 16 classes
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 16)
if use_gpu:
    resnet = resnet.cuda()
    
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet.fc.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

dataLoaders = { 'train':trainLoader, 'valid':validLoader }

In [None]:
start_time = time.time()
model = train_model(dataLoaders, resnet, criterion, optimizer, exp_lr_scheduler, num_epochs=2)
print('Training time: {:10f} minutes'.format((time.time()-start_time)/60))

In [None]:
def visualize_model(dataloders, model, num_images=16):
    cnt = 0
    fig = plt.figure(1, figsize=(16, 16))
    grid = ImageGrid(fig, 111, nrows_ncols=(4, 4), axes_pad=0.05)
    for i, (inputs, labels) in enumerate(dataloders['valid']):
        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)

        for j in range(inputs.size()[0]):
            ax = grid[cnt]
            imshow(ax, inputs.cpu().data[j])
            ax.text(10, 210, '{}/{}'.format(preds[j], labels.data[j]), 
                    color='k', backgroundcolor='w', alpha=0.8)
            cnt += 1
            if cnt == num_images:
                return
            
visualize_model(dloaders, resnet)            