## 1. Flow from directory

In [2]:
import tensorflow as tf
from tensorflow.keras import layers

from tensorflow.keras import datasets 
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
# transformation

train_datagen = ImageDataGenerator(
        rescale=1./255,
        width_shift_range=0.3,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

In [6]:
# flow from directory, image 수와 class 수를 알아서 찾음. (찾지 못할 경우 class_num 정의)

train_dir = '../dataset/mnist_png/training'

test_dir = '../dataset/mnist_png/testing'

train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(28, 28),
        batch_size=32,
        color_mode='grayscale'
)

validation_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(28, 28),
        batch_size=32,
        color_mode='grayscale'
)

Found 60000 images belonging to 10 classes.
Found 10000 images belonging to 10 classes.


### 학습

In [9]:
# 학습시키기 (model 정의, fit_generator)

inputs = layers.Input((28, 28, 1))
net = layers.Conv2D(32, (3, 3), padding='SAME')(inputs)
net = layers.Activation('relu')(net)
net = layers.Conv2D(32, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(0.5)(net)

net = layers.Conv2D(64, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.Conv2D(64, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(0.5)(net)

net = layers.Flatten()(net)
net = layers.Dense(512)(net)
net = layers.Activation('relu')(net)
net = layers.Dropout(0.5)(net)
net = layers.Dense(10)(net)
net = layers.Activation('softmax')(net)

model = tf.keras.Model(inputs=inputs, outputs=net, name='Basic_CNN')

# Model is the full model w/o custom layers
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),  # Optimization
              loss='categorical_crossentropy',  # Loss Function 
              metrics=['accuracy'])  # Metrics / Accuracy

# fit_generator를 사용하여 학습
model.fit_generator(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=1,
        validation_data=validation_generator,
        validation_steps=len(validation_generator))



<tensorflow.python.keras.callbacks.History at 0x1c5c35da1d0>

## 2. Flow from dataframe
Directory로는 class를 구분할 수 없고 파일 이름으로 class를 구분해야 하는 경우에 사용

In [17]:
from glob import glob
import os
import pandas as pd # pandas 사용

In [18]:
def get_class_name(path):
    fname = os.path.basename(path)
    return fname.replace('.png', '').split('_')[-1]

In [20]:
train_paths = glob('../dataset/cifar/train/*.png')
test_paths = glob('../dataset/cifar/test/*.png')

train_classes_name = [get_class_name(path) for path in train_paths]
test_classes_names = [get_class_name(path) for path in test_paths]

train_data = {'path': train_paths, 'class_name': train_classes_name}
test_data = {'path': test_paths, 'class_name': test_classes_names}

train_df = pd.DataFrame(train_data)
test_df = pd.DataFrame(test_data)

train_df.head()

Unnamed: 0,path,class_name
0,../dataset/cifar/train\0_frog.png,frog
1,../dataset/cifar/train\10000_automobile.png,automobile
2,../dataset/cifar/train\10001_frog.png,frog
3,../dataset/cifar/train\10002_frog.png,frog
4,../dataset/cifar/train\10003_ship.png,ship


In [None]:
# 저장하고 불러오기 (optional)

train_df.to_csv('train_dataset.csv', index=False)  
train_df = pd.read_csv('train_dataset.csv')

test_df.to_csv('train_dataset.csv', index=False)  
test_df = pd.read_csv('test_dataset.csv')

In [21]:
# transformation

train_datagen = ImageDataGenerator(
        rescale=1./255,
        width_shift_range=0.3,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

In [23]:
# flow from dataframe

train_generator = train_datagen.flow_from_dataframe(
        train_df,
        x_col='path',
        y_col='class_name',
        target_size=(28, 28),
        batch_size=32
)

validation_generator = test_datagen.flow_from_dataframe(
        test_df,
        x_col='path',
        y_col='class_name',
        target_size=(28, 28),
        batch_size=32
)

Found 50000 validated image filenames belonging to 10 classes.
Found 10000 validated image filenames belonging to 10 classes.


### 학습

In [None]:
# 학습 과정은 동일하다.

inputs = layers.Input((28, 28, 1))
net = layers.Conv2D(32, (3, 3), padding='SAME')(inputs)
net = layers.Activation('relu')(net)
net = layers.Conv2D(32, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(0.5)(net)

net = layers.Conv2D(64, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.Conv2D(64, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(0.5)(net)

net = layers.Flatten()(net)
net = layers.Dense(512)(net)
net = layers.Activation('relu')(net)
net = layers.Dropout(0.5)(net)
net = layers.Dense(10)(net)
net = layers.Activation('softmax')(net)

model = tf.keras.Model(inputs=inputs, outputs=net, name='Basic_CNN')

# Model is the full model w/o custom layers
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),  # Optimization
              loss='categorical_crossentropy',  # Loss Function 
              metrics=['accuracy'])  # Metrics / Accuracy

# fit_generator를 사용하여 학습
model.fit_generator(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=1,
        validation_data=validation_generator,
        validation_steps=len(validation_generator))

## 3. tf.data
DataGenerator를 사용하지 않는 방법으로, customize하기가 쉬워짐

In [24]:
import os 
from glob import glob

import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt
%matplotlib inline

In [25]:
def read_image(path):
    gfile = tf.io.read_file(path)
    image = tf.io.decode_image(gfile, dtype=tf.float32)
    return image

dataset = tf.data.Dataset.from_tensor_slices(train_paths) # 경로 가져오기
dataset = dataset.map(read_image) # 각각의 경로에 있는 데이터 읽기, 보통 map 함수에서 label을 함께 가져옴

In [26]:
# batch로 묶기 -> [batch size, x 크기, y 크기, channel]로 묶임

dataset = dataset.batch(4)

# shuffle하기 (순서 바꾸기)

dataset = dataset.shuffle(buffer_size=len(train_paths))

### 학습

In [None]:
# model 정의는 같다

inputs = layers.Input(input_shape)

net = layers.Conv2D(32, (3, 3), padding='SAME')(inputs)
net = layers.Activation('relu')(net)
net = layers.Conv2D(32, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(dropout_rate)(net)

net = layers.Conv2D(64, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.Conv2D(64, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(dropout_rate)(net)

net = layers.Flatten()(net)
net = layers.Dense(512)(net)
net = layers.Activation('relu')(net)
net = layers.Dropout(dropout_rate)(net)
net = layers.Dense(num_classes)(net)
net = layers.Activation('softmax')(net)

model = tf.keras.Model(inputs=inputs, outputs=net, name='Basic_CNN')

# Model is the full model w/o custom layers
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),  # Optimization
              loss='sparse_categorical_crossentropy',  # Loss Function 
              metrics=['accuracy'])  # Metrics / Accuracy

In [None]:
# 나누기에 소수점 이하를 버리는 // 함수로 epoch 수 결정
steps_per_epoch = len(train_paths) // batch_size
validation_steps = len(test_paths) // batch_size

model.fit_generator(
    train_dataset,
    steps_per_epoch=steps_per_epoch,
    validation_data=test_dataset,
    validation_steps=validation_steps,
    epochs=num_epochs,
    callbacks=[learning_rate_scheduler]
)