<a href="https://colab.research.google.com/github/sara87821/20210602/blob/main/03_Data_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# download dataset from https://drive.google.com/file/d/12iinuMZkaZOZGXlkPsUnmETaE-cMlyHu/view?usp=sharing
!gdown --id 12iinuMZkaZOZGXlkPsUnmETaE-cMlyHu --output defect.zip

Downloading...
From: https://drive.google.com/uc?id=12iinuMZkaZOZGXlkPsUnmETaE-cMlyHu
To: /content/defect.zip
53.2MB [00:00, 61.4MB/s]


In [None]:
# unzip file
!unzip -q defect.zip

In [None]:
import tensorflow as tf
import cv2
import numpy as np
from glob import glob
from sklearn.model_selection import train_test_split

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, img_paths, batch_size, img_size, shuffle=True, aug=False):
        self.img_paths = img_paths
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.aug = aug

        self.indexes = np.arange(len(self.img_paths)) # [0, 1, 2, 3, 4, 5]
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.img_paths) / self.batch_size)) # batches per epoch

    def __getitem__(self, index):
        # Generate indexes of the batch
        idxs = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        # Find list of IDs
        batch_img_paths = [self.img_paths[i] for i in idxs]

        # Generates data containing batch_size samples
        x = np.empty((len(batch_img_paths), self.img_size, self.img_size, 3), dtype=np.float32)
        y = np.empty((len(batch_img_paths), 1), dtype=np.float32)

        for i, img_path in enumerate(batch_img_paths):
            # read img
            img = cv2.imread(img_path)
            img = cv2.resize(img, (self.img_size, self.img_size))
            img = img / 255.

            if self.aug:
                img = img # Augmentation
                
            # read class label
            cls = img_path.split('/')[-3]
            if cls == 'MT_Free':
                cls = 0
            else:
                cls = 1
            x[i] = img
            y[i] = cls
            
        return x, y

    def on_epoch_end(self):
        # Updates indexes after each epoch
        if self.shuffle:
            np.random.shuffle(self.indexes)

In [None]:
img_paths = glob('./Magnetic-tile-defect/MT_Free/Imgs/*.jpg') + glob('./Magnetic-tile-defect/MT_Blowhole/Imgs/*.jpg')

In [None]:
len(img_paths)

1067

In [None]:
train_img_paths, val_img_paths = train_test_split(img_paths, test_size=0.2)

In [None]:
len(train_img_paths), len(val_img_paths)

(853, 214)

In [None]:
train_gen = DataGenerator(train_img_paths, 32, 200, shuffle=True, aug=True)
val_gen = DataGenerator(val_img_paths, 32, 200, shuffle=False, aug=False)

In [None]:
# def __len__()
len(train_gen)

27

In [None]:
# get 1 batch
# def __getitem__()
imgs, labels = train_gen[2]

In [None]:
imgs.shape, labels.shape

((32, 200, 200, 3), (32, 1))

In [None]:
print(labels)

In [None]:
# train

model.fit(train_gen,
          validation_data=val_gen,
          epochs=10,
          )