# 图片预处理

In [21]:
import os, shutil, platform
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from random import shuffle 

%matplotlib inline 

## 工具函数:判断操作系统种类

In [22]:
print("当前工作目录是:")
print(os.getcwd())

当前工作目录是:
/home/ubuntu/cat_vs_dog_cnn


In [23]:
from PIL import Image
print(Image.__file__)

/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/PIL/Image.py


In [24]:
def is_Windows_OS():
    if platform.system() == 'Windows':
        return True
    else:
        return False

## 定义的目录结构变量

In [25]:
# 训练集图片从zip包解压之后存放的目录
train_images_folder = 'train'

# 训练集图片目录
train_set_folder = 'train1'
train_set_folder_cat = 'train1/cats/'
train_set_folder_dog = 'train1/dogs/'

# 验证集图片目录
validation_set_folder = 'valid'
validation_set_folder_cat = 'valid/cats/'
validation_set_folder_dog = 'valid/dogs/'

# 测试集图片从zip包解压缩之后的存放目录
test_set_folder = 'test1'

## 新建有关目录结构

In [26]:
def rmrf_mkdir(dirname):
    if os.path.exists(dirname):
        shutil.rmtree(dirname)
    os.mkdir(dirname)

rmrf_mkdir(train_set_folder)
os.mkdir(train_set_folder_cat)
os.mkdir(train_set_folder_dog)

rmrf_mkdir(validation_set_folder)
os.mkdir(validation_set_folder_cat)
os.mkdir(validation_set_folder_dog)

## 加载训练集目录

In [27]:
train_filenames = os.listdir(train_images_folder)
train_cat = filter(lambda x:x[:3] == 'cat', train_filenames)
train_dog = filter(lambda x:x[:3] == 'dog', train_filenames)

In [28]:
print (type(train_cat))
print (len(train_filenames))

<class 'filter'>
25000


In [29]:
print (train_filenames[24999])

cat.8408.jpg


定义一组移动有用的函数：

In [30]:
num_of_images_valid_set = 1500

def move_cat_images():
    timer = 0
    for filename in train_cat:
        if timer < num_of_images_valid_set:
            shutil.copyfile('train/'+filename, validation_set_folder_cat + filename)
        else:
            shutil.copyfile('train/'+filename, train_set_folder_cat + filename)
        timer = timer + 1

def move_dog_images():
    timer = 0
    for filename in train_dog:
        if timer < num_of_images_valid_set:
            shutil.copyfile('train/'+filename, validation_set_folder_dog + filename)
        else:
            shutil.copyfile('train/'+filename, train_set_folder_dog + filename)
        timer = timer + 1
        
def move_cat_images():
    timer = 0
    for filename in train_cat:
        if timer < num_of_images_valid_set:
            shutil.copy2('train/'+filename, validation_set_folder_cat)
        else:
            shutil.copy2('train/'+filename, train_set_folder_cat)
        timer = timer + 1

def move_dog_images():
    timer = 0
    for filename in train_dog:
        if timer < num_of_images_valid_set:
            shutil.copy2('train/'+filename, validation_set_folder_dog)
        else:
            shutil.copy2('train/'+filename, train_set_folder_dog)
        timer = timer + 1     

In [31]:
def fill_train_set_valid_set_folder_with_images():
    if is_Windows_OS():
        move_cat_images()
        move_dog_images()
    else:
        move_cat_images()
        move_dog_images()

In [32]:
fill_train_set_valid_set_folder_with_images()
print ('fill_train_set_valid_set_folder_with_images执行完毕')

fill_train_set_valid_set_folder_with_images执行完毕


# 搭建并且编译模型

In [78]:
from keras.applications.densenet import DenseNet169
from keras.models import Sequential, Model
from keras.layers import Conv2D, Input
from keras.layers import MaxPooling2D, BatchNormalization
from keras.layers import Flatten, Dropout
from keras.layers import Dense, GlobalAveragePooling2D
from keras import optimizers


# 不包含原有模型的全连接层
dnesenet_model = DenseNet169(include_top=False, 
                             weights='imagenet',
                             input_shape = (250, 250, 3),
                             pooling='avg')

for layer in dnesenet_model.layers:
    layer.trainable = False

x = dnesenet_model.output


# Full Connected Layers
x = Dense(units = 1024, activation = 'relu',  name='fc1')(x)

# x = Dense(units = 8, activation = 'relu',  name='fc2')(x)

# Dropout Layer
x = Dropout(0.5, name='fc3_dropout')(x)

# Classifier
predictions = Dense(1, activation='sigmoid', name="predictions")(x)

model = Model(inputs=dnesenet_model.input, outputs=predictions)
# Compiling the CNN
optimizer = optimizers.Adam(lr=0.001, decay=0.1)
model.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])

In [79]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_14 (InputLayer)           (None, 250, 250, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_27 (ZeroPadding2 (None, 256, 256, 3)  0           input_14[0][0]                   
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 125, 125, 64) 9408        zero_padding2d_27[0][0]          
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 125, 125, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

## 训练模型

In [80]:
from keras.preprocessing.image import ImageDataGenerator

batch_size = 64
target_size = (250, 250)

train_image_gen = ImageDataGenerator(rotation_range=40,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     rescale = 1./255,
                                     shear_range = 0.2,
                                     zoom_range = 0.2,
                                     horizontal_flip = True)
print("训练集图片:")
training_set = train_image_gen.flow_from_directory('train1',
                                                 target_size = target_size,
                                                 batch_size = batch_size,
                                                 color_mode="rgb",
                                                 class_mode = 'binary')

valid_image_gen = ImageDataGenerator(rotation_range=40,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     rescale = 1./255,
                                     shear_range = 0.2,
                                     zoom_range = 0.2,
                                     horizontal_flip = True)
print("验证集图片:")
validation_set = valid_image_gen.flow_from_directory('valid',
                                                 target_size = target_size,
                                                 batch_size = batch_size,
                                                 color_mode="rgb",
                                                 class_mode = 'binary')


训练集图片:
Found 22000 images belonging to 2 classes.
验证集图片:
Found 3000 images belonging to 2 classes.


## 开始训练模型

In [None]:
# 训练集图片总量
train_size = 22000

# 每一轮训练需要执行的步长
steps_per_epoch = train_size/batch_size

# 验证集图片总量
valid_size = 3000
# 每一轮验证需要执行的步长
validation_steps = valid_size/batch_size

# 使用模型拟合图片数据
model.fit_generator(training_set, epochs = 5, steps_per_epoch = steps_per_epoch, 
                    validation_data=validation_set, validation_steps = validation_steps)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
  4/343 [..............................] - ETA: 52s - loss: 0.0832 - acc: 0.9727

In [None]:
print("训练完毕！")

## 开始预测

In [27]:
# 测试集图片目录
test_images_folder = 'test1'
test_set_folder = 'test'
test_set_folder_cat = 'test/cats/'
test_set_folder_dog = 'test/dogs/'

rmrf_mkdir(test_set_folder)
os.mkdir(test_set_folder_cat)
os.mkdir(test_set_folder_dog)

test_filenames = os.listdir(test_images_folder)
test_cat = filter(lambda x:x[:3] == 'cat', test_filenames)
test_dog = filter(lambda x:x[:3] == 'dog', test_filenames)

def copy_test_images():
    for filename in test_cat:
        shutil.copy2('test1/'+ filename, test_set_folder_cat)
    for filename in test_dog:
        shutil.copy2('test1/'+ filename, test_set_folder_dog)
    
    
copy_test_images()
print("图片复制完成!")

图片复制完成!


In [23]:
        
print("开始读取测试集图片:")
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(test_set_folder,
                                                    target_size=target_size, 
                                                    batch_size=batch_size,
                                                    class_mode=None)

开始读取测试集图片:
Found 0 images belonging to 2 classes.


In [None]:
pred_result = model.predict_generator(test_generator, verbose=1)
print("预测执行完毕！")

## 导出预测结果

In [26]:
import pandas as pd
from keras.preprocessing.image import *

df = pd.read_csv("sample_submission.csv")

for i, fname in enumerate(test_generator.filenames):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', pred_result[i])

df.to_csv('pred.csv', index=None)
df.head(10)

FileNotFoundError: File b'sample_submission.csv' does not exist