In [1]:
# From: https://www.kaggle.com/c/dog-breed-identification/data
# Author: Morpheus Hsieh

from __future__ import print_function, division

import os, sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision import datasets, models, transforms, utils


In [2]:
RawPath = r'D:\GitWork\dog_breed\data\raw'
print("Raw path: '{}'".format(RawPath))

ProcPath = r'D:\GitWork\dog_breed\data\processed'
print("Processed path: '{}'".format(ProcPath))

BatchSize = 10

Phase = ['train', 'valid']

Raw path: 'D:\GitWork\dog_breed\data\raw'
Processed path: 'D:\GitWork\dog_breed\data\processed'


In [None]:
# Create breed dictionary backward
proc_csv_abspath = os.path.join(ProcPath, 'processed_labels.csv')
df = pd.read_csv(proc_csv_abspath)
breeds



In [6]:
# Normalize
normalize = transforms.Normalize(
    mean = [0.485, 0.456, 0.406],
    std  = [0.229, 0.224, 0.225]
)

# Transform
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    normalize
])

class myDataset(Dataset):
    
    def __init__(self, path, phase='train', transform=None):
        npy_types = ['images', 'labels']
        npy_files = ['{}_{}.npy'.format(phase, x) for x in npy_types]
        
        self.images = np.load(os.path.join(path, npy_files[0]))
        self.labels = np.load(os.path.join(path, npy_files[1]))

        self.transform = transform
        self.len = len(self.images)

    def __getitem__(self, index):
        img_path = self.images[index]
        img_pil = Image.open(img_path)

        if self.transform is not None:
            img = self.transform(img_pil)

        lbl = int(self.labels[index])
        
        return (img, lbl)

    def __len__(self):
        return self.len

    
trainSet = myDataset(ProcPath, transform=transform)
validSet = myDataset(ProcPath, phase='valid', transform=transform)
print('\nTrainSet size: ', len(trainSet))
print('ValidSet size: ', len(validSet))

trainLoader = DataLoader(trainSet, batch_size=BatchSize, shuffle=True)
validLoader = DataLoader(validSet, batch_size=BatchSize, shuffle=False)
print('\nTrainLoader size: ', len(trainLoader))
print('ValidLoader size: ', len(validLoader))

imgs, lbls = next(iter(trainLoader))

print('\nImage type: ', type(imgs))
print('      size: ', imgs.size())

print('\nLabel type: ', type(lbls))
print('      size: ', lbls.size())

img = imgs[0]
print('\nImage: ', img.shape)
print(); print(img)

print('\nLabel: ', lbls)



TrainSet size:  8177
ValidSet size:  2045

TrainLoader size:  818
ValidLoader size:  205

Image type:  <class 'torch.Tensor'>
      size:  torch.Size([10, 3, 224, 224])

Label type:  <class 'torch.Tensor'>
      size:  torch.Size([10])

Image:  torch.Size([3, 224, 224])

tensor([[[ 1.0844,  0.9303,  0.7591,  ..., -0.0801, -0.2684, -0.2513],
         [ 1.2214,  0.6906,  0.5364,  ..., -0.0116,  0.0227, -0.0972],
         [-0.3198, -0.6281, -0.8849,  ..., -0.0972, -0.2513, -0.1657],
         ...,
         [ 0.1083,  0.5536,  0.0056,  ...,  1.5297,  1.1358,  1.1529],
         [ 0.3652,  0.4851,  0.0227,  ...,  0.9474,  0.2282, -0.3027],
         [-0.1999, -0.5596, -0.8849,  ...,  0.3994, -0.2513, -0.3198]],

        [[ 0.6779,  0.5203,  0.3627,  ..., -0.0224, -0.1625, -0.1275],
         [ 0.9930,  0.4853,  0.3627,  ...,  0.0651,  0.1352,  0.0126],
         [-0.2850, -0.5826, -0.8277,  ..., -0.0049, -0.1625, -0.0574],
         ...,
         [ 0.2927,  0.4678, -0.1975,  ...,  1.1155,  0.660

In [7]:
# Imshow for Tensor
def imshow(inp, title=None):
    inp  = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std  = np.array([0.229, 0.224, 0.225])
    inp  = std * inp + mean
    inp  = np.clip(inp, 0, 1)
    plt.figure(figsize=(16,16))
    plt.imshow(inp)
    
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated
    return

sampleLoader = DataLoader(trainSet, batch_size=6, shuffle=True)    

# Get a batch of training data
images, labels = next(iter(sampleLoader))
print('train images shape: ', images.shape)
print('train labels shape: ', labels.shape)

# Make a grid from batch
out = torchvision.utils.make_grid(images)

imshow(out, title=[breed_dict_bw.get(x.item()) for x in labels])

train images shape:  torch.Size([6, 3, 224, 224])
train labels shape:  torch.Size([6])


NameError: name 'breed_dict_bw' is not defined