### Transfer Learning Task
- 쌀 이파리 병에 대한 분류

In [1]:
from glob import glob
import os

root = './datasets/rice_leaf_diseases_dataset/original/'

directories = glob(os.path.join(root, '*'))

In [2]:
directories

['./datasets/rice_leaf_diseases_dataset/original\\Bacterialblight',
 './datasets/rice_leaf_diseases_dataset/original\\Brownspot',
 './datasets/rice_leaf_diseases_dataset/original\\Leafsmut']

In [3]:
directory_names = []

In [4]:
for directory in directories:
    directory_names.append(directory[directory.rindex('\\') + 1:])

In [5]:
directory_names

['Bacterialblight', 'Brownspot', 'Leafsmut']

In [6]:
for name in directory_names:
    for i, file_name in enumerate(os.listdir(os.path.join(root, name))):
        old_file = os.path.join(root + name + '/', file_name)
        new_file = os.path.join(root + name + '/', name + str(i +1) + '.png')

        os.rename(old_file, new_file)

In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_data_generator = ImageDataGenerator(rescale=1./255)

generator = image_data_generator.flow_from_directory(root, target_size=(64, 64), batch_size=32, class_mode='categorical')
print(generator.class_indices)

Found 4684 images belonging to 3 classes.
{'Bacterialblight': 0, 'Brownspot': 1, 'Leafsmut': 2}


In [9]:
import pandas as pd

l_df = pd.DataFrame({'file_paths': generator.filepaths, 'targets': generator.classes})
l_df.loc[:, 'file_paths'] = l_df.file_paths.apply(lambda x: x.replace('\\', '/'))
l_df

Unnamed: 0,file_paths,targets
0,./datasets/rice_leaf_diseases_dataset/original...,0
1,./datasets/rice_leaf_diseases_dataset/original...,0
2,./datasets/rice_leaf_diseases_dataset/original...,0
3,./datasets/rice_leaf_diseases_dataset/original...,0
4,./datasets/rice_leaf_diseases_dataset/original...,0
...,...,...
4679,./datasets/rice_leaf_diseases_dataset/original...,2
4680,./datasets/rice_leaf_diseases_dataset/original...,2
4681,./datasets/rice_leaf_diseases_dataset/original...,2
4682,./datasets/rice_leaf_diseases_dataset/original...,2


In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = \
train_test_split(l_df.file_paths, l_df.targets, stratify=l_df.targets, test_size=0.2, random_state=124)

X_train, X_val, y_train, y_val = \
train_test_split(X_train, y_train, stratify=y_train, test_size=0.2, random_state=124)

print(y_train.value_counts())
print(y_val.value_counts())
print(y_test.value_counts())

targets
1    1037
0    1026
2     934
Name: count, dtype: int64
targets
1    259
0    257
2    234
Name: count, dtype: int64
targets
1    324
0    321
2    292
Name: count, dtype: int64


In [16]:
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

(2997,) (2997,)
(750,) (750,)
(937,) (937,)


In [13]:
import shutil

root = './datasets/rice_leaf_diseases_dataset/'

for file_path in X_train:
    rice_dir = file_path[len(root + 'original/'): file_path.rindex('/')]
    destination = os.path.join(root, 'train/' + rice_dir)

    if not os.path.exists(destination):
        os.makedirs(destination)

    shutil.copy2(file_path, destination)

In [14]:
import shutil

root = './datasets/rice_leaf_diseases_dataset/'

for file_path in X_val:
    rice_dir = file_path[len(root + 'original/'): file_path.rindex('/')]
    destination = os.path.join(root, 'val/' + rice_dir)

    if not os.path.exists(destination):
        os.makedirs(destination)

    shutil.copy2(file_path, destination)

In [15]:
import shutil

root = './datasets/rice_leaf_diseases_dataset/'

for file_path in X_test:
    rice_dir = file_path[len(root + 'original/'): file_path.rindex('/')]
    destination = os.path.join(root, 'test/' + rice_dir)

    if not os.path.exists(destination):
        os.makedirs(destination)

    shutil.copy2(file_path, destination)

In [19]:
IMAGE_SIZE = 32
BATCH_SIZE = 64

In [23]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_dir = './datasets/rice_leaf_diseases_dataset/train'
val_dir = './datasets/rice_leaf_diseases_dataset/val'
test_dir = './datasets/rice_leaf_diseases_dataset/test'

train_generator = ImageDataGenerator(rescale=1./255)
val_generator = ImageDataGenerator(rescale=1./255)
test_generator = ImageDataGenerator(rescale=1./255)

train_flow = train_generator.flow_from_directory(
    train_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    # 라벨ㄹ링: binary 이진 라벨링 
    class_mode='categorical',
    shuffle=True
)

val_flow = val_generator.flow_from_directory(
    val_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_flow = test_generator.flow_from_directory(
    test_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

print(train_flow.class_indices)
print(validation_flow.class_indices)
print(test_flow.class_indices)

Found 2997 images belonging to 3 classes.
Found 750 images belonging to 3 classes.
Found 937 images belonging to 3 classes.
{'Bacterialblight': 0, 'Brownspot': 1, 'Leafsmut': 2}
{'Bacterialblight': 0, 'Brownspot': 1, 'Leafsmut': 2}
{'Bacterialblight': 0, 'Brownspot': 1, 'Leafsmut': 2}


In [28]:
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model

def create_model(verbose=False):
    input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    model = VGG16(input_tensor=input_tensor, include_top=False, weights='imagenet')

    # 분류기
    x = model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(50, activation='relu')(x)
    output = Dense(3, activation='softmax')(x)

    model = Model(inputs=model.input, outputs=output)
    if verbose:
        model.summary()

    return model

In [33]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import Adam 
from tensorflow.keras.losses import CategoricalCrossentropy

mcp_cb = ModelCheckpoint(
    filepath="./callback_files/weights.{epoch:03d}-{val_loss:.4f}-{acc:.4f}.weights.h5",
    monitor='val_loss',
    save_best_only=False,
    save_weights_only=True,
    mode='min'
)

rlr_cb = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=2,
    mode='min'
)

ely_cb = EarlyStopping(
    monitor='val_loss',
    patience=4,
    mode='min'
)

model = create_model(verbose=True)
model.compile(optimizer=Adam(), loss=CategoricalCrossentropy, metrics=['acc'])

In [34]:
import gc

gc.collect()

530

In [35]:
history = model.fit(train_flow, 
                    batch_size=BATCH_SIZE, 
                    epochs=10, 
                    validation_data=val_flow, 
                    callbacks=[mcp_cb, rlr_cb, ely_cb])

Epoch 1/10


  self._warn_if_super_not_called()


[1m29/47[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m10:55[0m 36s/step - acc: 0.3457 - loss: 7.5931

KeyboardInterrupt: 

이미지 사이즈를 처음부터 높게 줄 필요 없음!
성능이 너무 안 나오면 그때부터 올리기~

In [None]:
model.evaluate(test_flow)

In [None]:
import matplotlib.pyplot as plt

def show_history(history):
    plt.figure(figsize=(6, 6))
    plt.yticks(np.arange(0, 1, 0.05))
    plt.plot(history.history['acc'], label='train')
    plt.plot(history.history['val_acc'], label='validation')
    plt.legend()
    
show_history(history)

In [None]:
사전훈련 모델이 각각이 어떤 스케일을 사용했는지...
스케일 방식도 그 모델에 맞는걸로 해야함
