# Making DataFrame about Meta Information

In [11]:
import numpy as np
import pandas as pd
import os

def make_catndog_dataframe():
    paths = []    # 경로
    dataset_div = []
    label_div = []
    
    for dirname, _, filenames in os.walk('/kaggle/input/cat-and-dog'):
        for filename in filenames:
            if '.jpg' in filename:
                file_path = dirname + '/' + filename              
                paths.append(file_path)
                
                if '/training_set/' in file_path:
                    dataset_div.append('train')
                elif '/test_set/' in file_path:
                    dataset_div.append('test')
                else:
                    dataset_div.append('N/A')
                
                if 'dogs' in file_path:
                    label_div.append('DOG')
                elif 'cats' in file_path:
                    label_div.append('CAT')
                else:
                    label_div.append('N/A')

    data_df = pd.DataFrame({'path' : paths, 'dataset' : dataset_div , 'label' : label_div})
    return data_df


In [12]:
pd.set_option('display.max_colwidth', 200)
data_df = make_catndog_dataframe()
data_df.head()

# Making Sequence Model

In [21]:
from tensorflow.keras.utils import Sequence
import sklearn
import cv2

Batch_Size = 64
Image_Size = 224

class CnD_Dataset(Sequence):
    
    def __init__(self, image_filenames, labels, batch_size = Batch_Size, augmentor = None, shuffle = False):
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size
        self.augmentor = augmentor
        self.shuffle = shuffle
        if self.shuffle:
            # self.on_epoch_end()
            pass
        
        
    def __len__(self):
        return int(np.ceil(len(self.labels) / self.batch_size))
    
    def __getitem__(self, index):
        # Batch_Size 단위로 image_array, label_array 데이터를 가져와서 변환한뒤 다시 반환을 합니다.
        
        # index는 몇번쨰 batch인지를 나타냅니다.
        image_name_batch = self.image_filenames[index*self.batch_size:(index + 1)*self.batch_size]
        if self.labels is not None:
            label_batch = self.labels[index*self.batch_size:(index + 1)*self.batch_size]
        
        # 만약에 Albumentations에 관한 내용이 전해진다면, 적용해야 합니다.
        image_batch = np.zeros((image_name_batch.shape[0], Image_Size, Image_Size, 3))
        for image_index in range(image_name_batch.shape[0]):
            image = cv2.cvtColor(cv2.imread(image_name_batch[image_index]), cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (Image_Size, Image_Size))
            if self.augmentor is not None:
                image = self.augmentor(image = image)['image']
            
#             image = image / 255.0  if you want Scaling(0 ~ 1)
            
            image_batch[image_index] = image
        
        return image_batch, label_batch
    
    def on_epoch_end(self):
        if(self.shuffle):
            self.image_filenames, self.labels = sklearn.utils.shuffle(self.image_filenames, self.labels)
        else:
            pass

In [18]:
import albumentations as A
train_df = data_df[data_df['dataset'] == 'train']
test_df = data_df[data_df['dataset'] == 'test']

train_image_filenames = train_df['path'].values
train_image_labels = train_df['label'].values

cnd_augmentator = A.Compose([
    A.HorizontalFlip(p = 0.7),
    A.VerticalFlip(p = 0.7),
    A.ShiftScaleRotate(p = 0.7)
])

cnd_ds = CnD_Dataset(train_image_filenames, train_image_labels, augmentor = cnd_augmentator)

In [19]:
image_batch = next(iter(cnd_ds))[0]
label_batch = next(iter(cnd_ds))[1]
print(image_batch.shape, label_batch.shape)
print(image_batch[0])

In [20]:
import matplotlib.pyplot as plt
%matplotlib inline

def show_grid_images(images_batch, ncols = 4, title = None):
    figure, axs = plt.subplots(figsize = (22, 4), nrows = 1, ncols = ncols)
    for i in range(ncols):
        axs[i].imshow(np.array(images_batch[i], dtype = 'int32'))
        axs[i].axis('off')
        axs[i].set_title(title[i])

show_grid_images(image_batch, ncols = 4, title = 'augmented' + label_batch)