<a href="https://colab.research.google.com/github/sandrakaku/ml0930/blob/master/DataGenerator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from glob import glob
from tensorflow.keras import layers, models, optimizers, metrics, losses
import tensorflow as tf
from sklearn.model_selection import train_test_split
import imgaug.augmenters as iaa
import imgaug as ia
import numpy as np
import cv2

In [3]:
IMG_SIZE = 256 # 生出來影像的大小
BATCH_SIZE = 8 # 8個batch size

# augmentation
seq = iaa.Sequential([
    iaa.Fliplr(0.5), # 水平翻轉(左右互換) (0.5=50%的機率做水平翻轉)
    iaa.Flipud(0.5), # 上下顛倒
    iaa.Affine(   # 旋轉，平移，縮放
        rotate=(-45, 45), # -45度~45度裡隨機轉一個角度
        mode=ia.ALL, # edge, reflect, symmetric, warp, constant
        shear=(-16,16) # 把圖片壓扁
    )
])

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
Guo Sandra


ValueError: ignored

In [None]:
inputs = layers.Input((IMG_SIZE, IMG_SIZE, 3))
x = layers.Conv2D(64, 3, activation='relu')(inputs)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(4, activation='softmax')(x)

model = models.Model(inputs, x)
model.compile(optimizers.Adam(), 
              loss = losses.categorical_crossentropy)
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 254, 254, 64)      1792      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 252, 252, 64)      36928     
_________________________________________________________________
global_average_pooling2d (Gl (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 4)                 260       
Total params: 38,980
Trainable params: 38,980
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Read image path
img_paths = glob('drive/My Drive/class/勞動部/week8/seed/*/*.png')
img_paths[:5]

['drive/My Drive/class/勞動部/week8/seed/Cleavers/33c1f167f.png',
 'drive/My Drive/class/勞動部/week8/seed/Cleavers/a07efb1e0.png',
 'drive/My Drive/class/勞動部/week8/seed/Cleavers/9b35827fa.png',
 'drive/My Drive/class/勞動部/week8/seed/Cleavers/3d9ea1649.png',
 'drive/My Drive/class/勞動部/week8/seed/Cleavers/c7b4ce2e3.png']

In [None]:
train_img_paths, val_img_paths = train_test_split(img_paths, test_size=0.2)

In [None]:
len(train_img_paths), len(val_img_paths)

(1098, 275)

### Data Generator 1

In [None]:
# Data Generator
def data_generator(data_paths, batch_size, aug=True):
    class_map = {'Charlock':0, 'Cleavers': 1, 'Fat Hen': 2, 'Maize': 3}
    '''data generator for fit_generator'''
    n = len(data_paths)
    i = 0
    data_paths = data_paths
    while True:
        image_data = []
        class_data = []
        for b in range(batch_size):
            if i==0:
                np.random.shuffle(data_paths)
            path = data_paths[i]
            img = cv2.imread(path)[:,:,::-1]
            # img aug
            if aug:
                img = seq.augment_image(img)
            
            # img preprocess
            img_resize = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            img_resize = img_resize/255
            
            # read label
            cls = path.split('/')[-2]

            image_data.append(img_resize)
            class_data.append(class_map[cls])
            i = (i+1) % n # 記錄讀到第幾個
        image_data = np.array(image_data)
        class_data = np.array(class_data)
        class_data = tf.keras.utils.to_categorical(class_data, num_classes = 4)
        yield image_data, class_data # while版的return 但是不會再進入迴圈


In [None]:
data_gen_train = data_generator(train_img_paths, batch_size=BATCH_SIZE, aug=True)
data_gen_val = data_generator(val_img_paths, batch_size=BATCH_SIZE, aug=False)

In [None]:
model.fit(data_gen_train, 
          epochs=1, # 要有60個step = 1 epoch
          steps_per_epoch=len(train_img_paths)//BATCH_SIZE, #所有的資料數量/一個batch
          validation_data = data_gen_val,
          validation_steps = len(val_img_paths) // BATCH_SIZE)

### Data Generator 2

In [None]:
from tensorflow.keras.utils import Sequence
class DataGenerator(Sequence):
    """
    Generates data for Keras
    ref: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
    """
    def __init__(self,
                 paths,
                 batch_size,
                 img_size,
                 augment,
                 shuffle=True):
        self.paths = paths
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.paths))
        self.class_map = {'Charlock':0, 'Cleavers': 1, 'Fat Hen': 2, 'Maize': 3}
        self.num_classes = len(self.class_map)
        self.img_size = img_size
        self.augment = augment
        self.on_epoch_end()

    def __len__(self):
        'number of batches per epoch'
        return int(np.ceil(len(self.paths) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'

        # Generate indexes of the batch
        idxs = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Find list of IDs
        batch_paths = [self.paths[i] for i in idxs]

        # Generate data
        X, y = self.__data_generation(batch_paths)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, paths):
        """
        Generates data containing batch_size samples
        """
        X = np.empty((len(paths), self.img_size, self.img_size, 3), dtype=np.float32)
        y = np.empty((len(paths)))

        for i, path in enumerate(paths):
            img = cv2.imread(path)[:,:,::-1]
            # img aug
            if self.augment:
                img = seq.augment_image(img)
            # img preprocess
            img = cv2.resize(img, (self.img_size, self.img_size))
            img = img / 255.
            # read label
            cls = path.split('/')[-2]
            cls = self.class_map[cls]

            X[i] = img
            y[i] =  cls
        # one-hot encoding
        y = tf.keras.utils.to_categorical(y, num_classes=self.num_classes)
        return X, y

In [None]:
data_gen_train = DataGenerator(train_img_paths, batch_size=BATCH_SIZE, img_size=IMG_SIZE, augment=True)
data_gen_val = DataGenerator(val_img_paths, batch_size=BATCH_SIZE, img_size=IMG_SIZE, augment=False)

In [None]:
model.fit(data_gen_train, 
          epochs=1, 
          validation_data = data_gen_val)