In [1]:
# From: https://www.kaggle.com/c/dog-breed-identification/data
# Author: Morpheus Hsieh

from __future__ import print_function, division

import os, sys
import copy
import io
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
from mpl_toolkits.axes_grid1 import ImageGrid
from os import listdir
from os.path import join, isfile, split
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision import datasets, models, transforms, utils

print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.5.1
Torchvision Version:  0.6.1


In [2]:
# Load parameters from json file

OutPath = r'D:\GitWork\dog_breed\output'

cfgPath = r'D:\GitWork\dog_breed\configs'
fname = 'Params_20200923-2230.json'
json_file = join(cfgPath, fname)

with open(json_file) as fin: 
    Params = json.load(fin) 

print('Parameters:')
print(json.dumps(Params, indent=4))

Parameters:
{
    "DataPath": "D:\\GitWork\\dog_breed\\data",
    "OutPath": "D:\\GitWork\\dog_breed\\output",
    "ProcPath": "D:\\GitWork\\dog_breed\\processed",
    "PreTrainPath": "D:\\GitWork\\dog_breed\\pretrained",
    "PreTrainFile": "resnet50_20200925-1555_acc80.pth",
    "LoadPreModel": false,
    "TestPath": "D:\\Dataset\\dog-breed-identification\\test",
    "TrainPath": "D:\\Dataset\\dog-breed-identification\\train",
    "CsvLabel": "labels.csv",
    "BatchSize": 16,
    "FracForTrain": 0.8
}


In [3]:
# Read labels information

DataPath = Params.get('DataPath')
csv_labels = Params.get('CsvLabel')
f_abspath = join(DataPath, csv_labels)

df_labels = pd.read_csv(f_abspath)

print(df_labels.info())
print(); print(df_labels.head())

NumClasses = df_labels.shape[0]
print('\nNum classes:', NumClasses)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10222 entries, 0 to 10221
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      10222 non-null  object
 1   breed   10222 non-null  object
dtypes: object(2)
memory usage: 159.8+ KB
None

                                 id             breed
0  000bec180eb18c7604dcecc8fe0dba07       boston_bull
1  001513dfcb2ffafc82cccf4d8bbaba97             dingo
2  001cdf01b096e06d78e9e5112d419397          pekinese
3  00214f311d5d2247d5dfe4fe24b2303d          bluetick
4  0021f9ceb3235effd7fcde7f7538ed62  golden_retriever

Num classes: 10222


In [4]:
# Count all breeds
def countBreeds(df):
    df1 = df_labels.groupby("breed")["id"].count().reset_index(name="count")
    df1 = df1.sort_values(by='count', ascending=False).reset_index(drop=True)
    df1.insert(0, 'breed_id', df1.index)
    return df1

df_breeds = countBreeds(df_labels)
print(df_breeds.info())
print(); print(df_breeds.head())

NumClasses = int(df_breeds.shape[0])
print('\nNum classes:', NumClasses)

selected_breeds = df_breeds['breed'].tolist()

# dict_bid_fw = dict(df_breeds[['breed', 'breed_id']].values)
dict_bid_bw = dict(df_breeds[['breed_id', 'breed']].values)

def prettyPrint(d, indent=0):
    print('{')
    for key, value in d.items():
        if isinstance(value, dict):
            print('  ' * indent + str(key))
            prettyPrint(value, indent+1)
        else:
            print('  ' * (indent+1) + f"{key}: {value}")
    print('}')
                
print('\nBreeds dict backward:'); 
prettyPrint(dict_bid_bw)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   breed_id  120 non-null    int64 
 1   breed     120 non-null    object
 2   count     120 non-null    int64 
dtypes: int64(2), object(1)
memory usage: 2.9+ KB
None

   breed_id                 breed  count
0         0    scottish_deerhound    126
1         1           maltese_dog    117
2         2          afghan_hound    116
3         3           entlebucher    115
4         4  bernese_mountain_dog    114

Num classes: 120

Breeds dict backward:
{
  0: scottish_deerhound
  1: maltese_dog
  2: afghan_hound
  3: entlebucher
  4: bernese_mountain_dog
  5: shih-tzu
  6: great_pyrenees
  7: pomeranian
  8: basenji
  9: samoyed
  10: airedale
  11: tibetan_terrier
  12: leonberg
  13: cairn
  14: beagle
  15: japanese_spaniel
  16: australian_terrier
  17: blenheim_spaniel
  18: miniature_pinscher
  19: iris

In [5]:
# Build dataset

# Transform
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])

class myDataset(Dataset):

    def __init__(self, path, transform=None):
        
        img_list = [
            join(path, f) \
            if f.endswith('.jpg') and isfile(join(path, f)) else None \
            for f in listdir(path)
        ]

        self.len = len(img_list)
        self.images = img_list
        self.transform = transform

    def __getitem__(self, index):
        img = self.images[index]
        img_pil = Image.open(img)

        if self.transform is not None:
            img_tensor = self.transform(img_pil)

        iid = split(img)[1].replace('.jpg', '')
        
        return [img_tensor, iid]

    def __len__(self):
        return self.len

    
TestPath = Params['TestPath']
# BatchSize = Params['BatchSize']
BatchSize = 100
    
dataSet = myDataset(TestPath, transform=transform)
dataLoader = DataLoader(dataSet, batch_size=BatchSize, shuffle=False)
dataSize = len(dataSet)

imgs, iids = next(iter(dataLoader))
print('\nImage shape:', imgs.shape)

print('\nImage ids')
id_list = [''.join(iid) for iid in iids]
print('  '+'\n  '.join(id_list))

img = imgs[0]
print('\nImage shape:', img.shape)

print('\nImage tensor:')
print(img)


Image shape: torch.Size([100, 3, 224, 224])

Image ids
  000621fb3cbb32d8935728e48679680e
  00102ee9d8eb90812350685311fe5890
  0012a730dfa437f5f3613fb75efcd4ce
  001510bc8570bbeee98c8d80c8a95ec1
  001a5f3114548acdefa3d4da05474c2e
  00225dcd3e4d2410dd53239f95c0352f
  002c2a3117c2193b4d26400ce431eebd
  002c58d413a521ae8d1a5daeb35fc803
  002f80396f1e3db687c5932d7978b196
  0036c6bcec6031be9e62a257b1c3c442
  0041940322116ae58c38130f5a6f71f9
  0042d6bf3e5f3700865886db32689436
  004476c96f575879af4af471af65cae8
  00485d47de966a9437ad3b33ac193b6f
  00496f65de6cc319145ce97bd6e90360
  004bf14426d1a830d459a9e0c0721309
  004c3721eb88358f462cdcec6b2380b7
  00559f56aab7e0a7749220f6aed65162
  005b281f1a4d6f29d527c9585e9bd33c
  005b6c6c76fefd6b458ef6fb6e54da6e
  006870b49353779b25eeb91fed43c31a
  0068f3a21b159ece126a28580cdad7a0
  0069b1cc4546fc98f84f981bf9a0696a
  0077bc3c63486ff09d3774d956af8f76
  00780e5d2bf4f7e4b5f96d08ddde669a
  007ed71136966728f5c0936e23c8286b
  0081831ceb49cd64212c32b884036b82

In [6]:
# Use GPU for train
use_gpu = torch.cuda.is_available()
device = torch.device("cuda:0" if use_gpu else "cpu")
print(device)

cuda:0


In [7]:
# Build Model 
model = models.resnet50(pretrained=True)

# freeze all model parameters
for param in model.parameters():
    param.requires_grad = False

# New final layer with NumClasses
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, NumClasses)

# load pretrained mode
PreTranPath = Params['PreTrainPath']
PreTranFile = Params['PreTrainFile']
f_abspath = join(PreTranPath, PreTranFile)
model.load_state_dict(torch.load(f_abspath))

if use_gpu: model = model.cuda()

print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [8]:
# Pediction
import torch.nn.functional as nnf

# Output submission
fname_submission = 'submission.csv'
f_abspath = join(OutPath, fname_submission)

cols_preds = ['id', 'prediction']
df_preds = pd.DataFrame(columns=cols_preds)

cols_probs = ['id'] + selected_breeds
df_probs = pd.DataFrame(columns=cols_probs)

start_time = time.time()
print('Start testing...')

model.eval()

for i, (inputs, iids) in enumerate(dataLoader):

    inputs = Variable(inputs.cuda())
    iid_list = list(iids)
    
    with torch.set_grad_enabled(True):
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=1)
        probs = torch.nn.functional.softmax(outputs, dim=1)
        
        if i == 0:
            # print(); print(len(iid_list)); print('\n'.join(iid_list))
            print('\nProbs:'); print(probs.shape); print(probs)
            print('\nPreds:'); print(preds.shape); print(preds)
            print()
    
    pred_list = preds.tolist()
    pred_breeds = [dict_bid_bw.get(x) for x in pred_list]
    
    df_tmp = pd.DataFrame({
        'id': iid_list,
        'prediction': pred_breeds
    })
    df_preds = df_preds.append(df_tmp)

    df_tmp = pd.DataFrame({'id': iid_list})
    df_tmp[selected_breeds] = pd.DataFrame(probs.tolist())
    df_probs = df_probs.append(df_tmp)

    print(i, end=', ')
    
print()
print('Testing time: {:10f} minutes'.format((time.time()-start_time)/60))    

print(); print(df_preds.info())
print(); print(df_preds.head())

print(); print(df_probs.info())
print(); print(df_probs.head())

Start testing...

Probs:
torch.Size([100, 120])
tensor([[1.2100e-03, 2.0992e-01, 8.4855e-03,  ..., 6.6440e-03, 4.0181e-03,
         3.9533e-03],
        [1.1271e-04, 3.2042e-03, 8.7192e-05,  ..., 6.2201e-04, 2.6369e-04,
         1.3375e-02],
        [1.5161e-02, 1.5351e-02, 2.8774e-02,  ..., 1.0469e-02, 7.5163e-03,
         6.3521e-03],
        ...,
        [4.6673e-03, 3.1670e-03, 2.1943e-03,  ..., 4.0342e-03, 8.4376e-03,
         1.0519e-02],
        [7.8491e-03, 8.4818e-04, 8.0299e-04,  ..., 2.7263e-03, 1.3364e-03,
         4.5447e-03],
        [5.8779e-03, 8.7829e-02, 1.8355e-03,  ..., 7.9751e-03, 2.7250e-02,
         3.4388e-03]], device='cuda:0', grad_fn=<SoftmaxBackward>)

Preds:
torch.Size([100])
tensor([  1,   9,  93,  26,   1,  35,  13,   9,  44,  70,   1,  38,   9,   7,
        106,  32,  77,   1,  23,  21,  58,  24,  71,   1,  37,  46,  48,  23,
          1,  10,  45,   6,  70,   1,  90,  41,  15,  68,  51,   1,  35,   0,
         91,   1,  45,   4,   5,  22,  18,  48,  14,

In [9]:
from datetime import datetime

currDT = datetime.now()
currStr = currDT.strftime("%Y%m%d-%H%M%S")

fname = 'Prediction_{}.csv'.format(currStr)
df_preds.to_csv(join(OutPath, fname), index=False)

fname = 'Probability_{}.csv'.format(currStr)
df_probs.to_csv(join(OutPath, fname), index=False)