# Kaggle上的貓狗大戰分類題

In [2]:
# 預防錯誤： OMP: Error #15: Initializing libiomp5.dylib, but found libiomp5.dylib already initialized.
import os

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

In [4]:
# 資料整理，從解開的訓練集中，用狗猫各1000張做訓練集，各500張做驗證集，各500張做測試集
# 照片用cat.NUMBER.jpg、dog.NUMBER.jpg命名，NUMBER from 0 to 12499

import os, shutil
original_dataset_dir = '/Users/ken/Downloads/dogs-vs-cats/train/'
base_dir = '/Users/ken/Downloads/cats_and_dogs_small'
os.mkdir(base_dir)

# 在新的資料存放目錄下新增三個資料夾：train、validation、test
train_dir = os.path.join(base_dir, 'train') 
os.mkdir(train_dir) 
validation_dir = os.path.join(base_dir, 'validation') 
os.mkdir(validation_dir) 
test_dir = os.path.join(base_dir, 'test') 
os.mkdir(test_dir)

# 在上述三個資料集目錄下再新增dogs、cats兩個資料夾
train_cats_dir = os.path.join(train_dir, 'cats') 
os.mkdir(train_cats_dir)
train_dogs_dir = os.path.join(train_dir, 'dogs') 
os.mkdir(train_dogs_dir)

validation_cats_dir = os.path.join(validation_dir, 'cats') 
os.mkdir(validation_cats_dir)
validation_dogs_dir = os.path.join(validation_dir, 'dogs') 
os.mkdir(validation_dogs_dir)

test_cats_dir = os.path.join(test_dir, 'cats') 
os.mkdir(test_cats_dir)
test_dogs_dir = os.path.join(test_dir, 'dogs') 
os.mkdir(test_dogs_dir)

# 複製1000張貓狗照做訓練集
fnames = ['cat.{}.jpg'.format(i) for i in range(1000)] 
for fname in fnames:
  src = os.path.join(original_dataset_dir, fname) 
  dst = os.path.join(train_cats_dir, fname) 
  shutil.copyfile(src, dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(1000)] 
for fname in fnames:
  src = os.path.join(original_dataset_dir, fname) 
  dst = os.path.join(train_dogs_dir, fname) 
  shutil.copyfile(src, dst)

# 複製500張貓狗照做驗證集
fnames = ['cat.{}.jpg'.format(i) for i in range(1000,1500)] 
for fname in fnames:
  src = os.path.join(original_dataset_dir, fname) 
  dst = os.path.join(validation_cats_dir, fname) 
  shutil.copyfile(src, dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(1000,1500)] 
for fname in fnames:
  src = os.path.join(original_dataset_dir, fname) 
  dst = os.path.join(validation_dogs_dir, fname) 
  shutil.copyfile(src, dst)

# 複製500張貓狗照做測試集
fnames = ['cat.{}.jpg'.format(i) for i in range(1500,2000)] 
for fname in fnames:
  src = os.path.join(original_dataset_dir, fname) 
  dst = os.path.join(test_cats_dir, fname) 
  shutil.copyfile(src, dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(1500,2000)] 
for fname in fnames:
  src = os.path.join(original_dataset_dir, fname) 
  dst = os.path.join(test_dogs_dir, fname) 
  shutil.copyfile(src, dst)

In [5]:
# 定義網路

from keras import layers 
from keras import models

# 照片大小並不一致，隨意選兩張，有375x299、500x374，還有更多種Size。這裡任意以150x150做為輸入照片大小，訓練前要先resize
model = models.Sequential() 
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3))) 
model.add(layers.MaxPooling2D((2, 2))) 
model.add(layers.Conv2D(64, (3, 3), activation='relu')) 
model.add(layers.MaxPooling2D((2, 2))) 
model.add(layers.Conv2D(128, (3, 3), activation='relu')) 
model.add(layers.MaxPooling2D((2, 2))) 
model.add(layers.Conv2D(128, (3, 3), activation='relu')) 
model.add(layers.MaxPooling2D((2, 2))) 
model.add(layers.Flatten()) 
model.add(layers.Dense(512, activation='relu')) 
model.add(layers.Dense(1, activation='sigmoid'))

Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.


In [6]:
# 編繹網路 

from keras import optimizers

model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc'])

In [7]:
# 讀取JPEG檔, 轉成RGB值後做Normalization，並resize成150x150

from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255) 
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(train_dir, 
  target_size=(150, 150), batch_size=20, class_mode='binary')

validation_generator = test_datagen.flow_from_directory(validation_dir, 
  target_size=(150, 150), batch_size=20, class_mode='binary')

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [8]:
# 訓練模型。這裡改用fit_generator，使用generator做資料來源

history = model.fit_generator(train_generator, 
                              steps_per_epoch=100, 
                              epochs=30, 
                              validation_data=validation_generator, 
                              validation_steps=50)

Instructions for updating:
Use tf.cast instead.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
