In [None]:
# 위 작업 일괄 처리 (파일 압축 해제)

%%bash

rm -rf sample_data/dogs-vs-cats
rm -rf sample_data/dogs-vs-cats2

cp drive/MyDrive/Colab\ Notebooks/data-files/dogs-vs-cats.zip sample_data/
unzip sample_data/dogs-vs-cats.zip -d sample_data/

unzip sample_data/dogs-vs-cats/train.zip -d sample_data/dogs-vs-cats
unzip sample_data/dogs-vs-cats/test1.zip -d sample_data/dogs-vs-cats
mv sample_data/dogs-vs-cats/test1 sample_data/dogs-vs-cats/test

rm -rf sample_data/__MACOSX

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras as tf_keras

In [3]:
# 데이터 디렉터리 구성
# train_dir, test_dir, validation_dir # 훈련, 테스트, 검증 세트 폴드
# train_dir -> cats, dogs # 범주로 사용될 폴더
# test_dir -> cats, dogs
# validation_dir -> cats, dogs

import os, shutil

src_base = 'sample_data/dogs-vs-cats'
dest_base = 'sample_data/dogs-vs-cats2'

if os.path.exists(dest_base): # 이미 존재하는 폴더라면
  shutil.rmtree(dest_base)    # 폴더 제거 후
os.makedirs(dest_base)        # 새로 폴더 만들기

# train 폴더의 0 ~ 1000 : trainset,  1000 ~ 1500 : validationset, 1500 ~ 2000 : testset
for start, stop, path in zip([0, 1000, 1500], [1000, 1500, 2000], ['train', 'validation', 'test']):
  os.mkdir(os.path.join(dest_base, path)) # train or validation or test 폴더 생성
  for sub_path in ['cat', 'dog']:
    os.mkdir(os.path.join(dest_base, path, sub_path)) # train, validation, test 하위에 cats or dog 폴더 생성
    for idx in np.arange(start, stop):
      fname = f'{sub_path}.{idx}.jpg'
      shutil.copy(os.path.join(src_base, 'train', fname), os.path.join(dest_base, path, sub_path, fname))

In [4]:
# 디렉터리 구성 결과 확인
for path in ['train', 'test', 'validation']:
  for subpath in ['dog', 'cat']:
    print(path, '/', subpath, ':', len(os.listdir(f'sample_data/dogs-vs-cats2/{path}/{subpath}')))

train / dog : 1000
train / cat : 1000
test / dog : 500
test / cat : 500
validation / dog : 500
validation / cat : 500


In [5]:
# 입력 자동화 도구 만들기

# 특정 디렉터리의 파일을 읽어서 모델의 입력데이터로 변환하는 도구
train_generator = tf_keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
validation_generator = tf_keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
test_generator = tf_keras.preprocessing.image.ImageDataGenerator(rescale=1/255)

In [6]:
# 입력 데이터 구성

datasets = []
for path, generator in zip( ['train', 'test', 'validation'],
                            [train_generator, test_generator, validation_generator]):
  dataset = generator.flow_from_directory(directory=f'sample_data/dogs-vs-cats2/{path}',
                                          target_size=(256, 256),
                                          batch_size=32,
                                          class_mode="binary")
  datasets.append(dataset)

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [7]:
# 이미 만들어진 모델 가져오기 1
base_model = tf_keras.applications.vgg16.VGG16()

base_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [8]:
# 이미 만들어진 모델 가져오기 2 ( 최상위 판별층 제외 )

base_model = tf_keras.applications.vgg16.VGG16(include_top=False,
                                               weights="imagenet",
                                               input_shape=(256, 256, 3))

base_model.trainable = True

base_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [11]:
# 사전 학습 모델의 각 층(레이어)의 학습 가능 상태 확인
print( [ layer.trainable for layer in base_model.layers ] )

[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]


In [14]:
# 사전 학습 모델의 일부 층만 학습 가능 상태로 유지
for layer in base_model.layers:
  layer.trainable = True
for layer in base_model.layers[:-4]:
  layer.trainable = False

print( [ layer.trainable for layer in base_model.layers ] )

[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True, True]


In [18]:
# 모델 구조 설계 ( 사전 학습 모델을 통해 만든 특성맵 데이터를 입력으로 사용하는 판별 모델 )

input = tf_keras.layers.Input(shape=(256, 256, 3))
x = base_model(input)
x = tf_keras.layers.Flatten()(x)
x = tf_keras.layers.Dense(256, activation='relu', kernel_regularizer=tf_keras.regularizers.l2(0.001))(x)
output = tf_keras.layers.Dense(1, activation='sigmoid')(x)
full_model = tf_keras.models.Model(inputs=input, outputs=output)

full_model.summary()

In [19]:
# 모델 학습 설계 ( 사전 학습 모델을 통해 만든 특성맵 데이터를 입력으로 사용하는 판별 모델 )

full_model.compile(loss='binary_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])

In [20]:
# 판별 모델 학습

history = full_model.fit(datasets[0],
                         epochs=20,
                         validation_data=(datasets[2]))


Epoch 1/20
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 342ms/step - accuracy: 0.5058 - loss: 0.8165 - val_accuracy: 0.5000 - val_loss: 0.6934
Epoch 2/20
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 290ms/step - accuracy: 0.4749 - loss: 0.6933 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 3/20
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 373ms/step - accuracy: 0.4877 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 4/20
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 367ms/step - accuracy: 0.4653 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 5/20
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 287ms/step - accuracy: 0.4748 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 6/20
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 298ms/step - accuracy: 0.4592 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 7/20
[1m63/63[

In [21]:
print( full_model.evaluate(datasets[0]) )
print( full_model.evaluate(datasets[1]) )

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 177ms/step - accuracy: 0.5070 - loss: 0.6931
[0.693147599697113, 0.5]
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 160ms/step - accuracy: 0.5210 - loss: 0.6931
[0.6931476593017578, 0.5]
