# 图片预处理

In [1]:
import os, shutil, platform
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from random import shuffle 

%matplotlib inline 

## 工具函数:判断操作系统种类

In [2]:
print("当前工作目录是:")
print(os.getcwd())

当前工作目录是:
/home/ubuntu/cat_vs_dog_cnn


In [3]:
file_path = os.getcwd() + '/train/dog.410.jpg'

image = Image.open('train/dog.410.jpg')
print(image.format)
image.show()

NameError: name 'Image' is not defined

In [29]:
file_path = os.getcwd() + '/train1/dogs/dog.410.jpg'

image = Image.open('train1/dogs/dog.410.jpg')
image.show()

FileNotFoundError: [Errno 2] No such file or directory: 'train1/dogs/dog.410.jpg'

In [3]:
from PIL import Image
print(Image.__file__)

/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/PIL/Image.py


In [4]:
def is_Windows_OS():
    if platform.system() == 'Windows':
        return True
    else:
        return False

## 定义的目录结构变量

In [5]:
# 训练集图片从zip包解压之后存放的目录
train_images_folder = 'train'

# 训练集图片目录
train_set_folder = 'train1'
train_set_folder_cat = 'train1/cats/'
train_set_folder_dog = 'train1/dogs/'

# 验证集图片目录
validation_set_folder = 'valid'
validation_set_folder_cat = 'valid/cats/'
validation_set_folder_dog = 'valid/dogs/'

# 测试集图片从zip包解压缩之后的存放目录
test_set_folder = 'test1'


batch_size = 200
target_size = (250, 250)

## 新建有关目录结构

In [6]:
def rmrf_mkdir(dirname):
    if os.path.exists(dirname):
        shutil.rmtree(dirname)
    os.mkdir(dirname)

rmrf_mkdir(train_set_folder)
os.mkdir(train_set_folder_cat)
os.mkdir(train_set_folder_dog)

rmrf_mkdir(validation_set_folder)
os.mkdir(validation_set_folder_cat)
os.mkdir(validation_set_folder_dog)

## 加载训练集目录

In [7]:
train_filenames = os.listdir(train_images_folder)
train_cat = filter(lambda x:x[:3] == 'cat', train_filenames)
train_dog = filter(lambda x:x[:3] == 'dog', train_filenames)

In [8]:
print (type(train_cat))
print (len(train_filenames))

<class 'filter'>
25000


In [9]:
print (train_filenames[24999])

cat.8408.jpg


定义一组移动有用的函数：

In [10]:
num_of_images_valid_set = 1500

def move_cat_images():
    timer = 0
    for filename in train_cat:
        if timer < num_of_images_valid_set:
            shutil.copyfile('train/'+filename, validation_set_folder_cat + filename)
        else:
            shutil.copyfile('train/'+filename, train_set_folder_cat + filename)
        timer = timer + 1

def move_dog_images():
    timer = 0
    for filename in train_dog:
        if timer < num_of_images_valid_set:
            shutil.copyfile('train/'+filename, validation_set_folder_dog + filename)
        else:
            shutil.copyfile('train/'+filename, train_set_folder_dog + filename)
        timer = timer + 1
        
def create_cat_images_symlink():
    timer = 0
    for filename in train_cat:
        if timer < num_of_images_valid_set:
            os.symlink('train/'+filename, validation_set_folder_cat + filename)
        else:
            os.symlink('train/'+filename, train_set_folder_cat + filename)
        timer = timer + 1

def create_dog_images_symlink():
    timer = 0
    for filename in train_dog:
        if timer < num_of_images_valid_set:
            os.symlink('train/'+filename, validation_set_folder_dog + filename)
        else:
            os.symlink('train/'+filename, train_set_folder_dog + filename)
        timer = timer + 1    

In [11]:
def fill_train_set_valid_set_folder_with_images():
    if is_Windows_OS():
        move_cat_images()
        move_dog_images()
    else:
        create_cat_images_symlink()
        create_dog_images_symlink()

In [12]:
fill_train_set_valid_set_folder_with_images()
print ('fill_train_set_valid_set_folder_with_images执行完毕')

fill_train_set_valid_set_folder_with_images执行完毕


# 搭建并且编译模型

In [13]:
from keras.applications.densenet import DenseNet169
from keras.models import Sequential, Model
from keras.layers import Conv2D, Input
from keras.layers import MaxPooling2D, BatchNormalization
from keras.layers import Flatten, Dropout
from keras.layers import Dense, GlobalAveragePooling2D


# 不包含原有模型的全连接层
dnesenet_model = DenseNet169(include_top=False, 
                             weights='imagenet',
                             input_shape = (250, 250, 3),
                             pooling='avg')

for layer in dnesenet_model.layers:
    layer.trainable = False

x = dnesenet_model.output

# 添加全局平均池化层
# x = GlobalAveragePooling2D()(x)

# Full Connected Layers
# x = Dense(units = 512, activation = 'relu',  name='fc1')(x)

x = Dense(units = 128, activation = 'relu',  name='fc2')(x)

# Dropout Layer
x = Dropout(0.5, name='fc3_dropout')(x)

# Classifier
predictions = Dense(2, activation='softmax', name="predictions")(x)

model = Model(inputs=dnesenet_model.input, outputs=predictions)
# Compiling the CNN
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

Using TensorFlow backend.


In [14]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 250, 250, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 256, 256, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 125, 125, 64) 9408        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 125, 125, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

## 训练模型

In [17]:
from keras.preprocessing.image import ImageDataGenerator


train_image_gen = ImageDataGenerator(rescale = 1./255,
                                     shear_range = 0.2,
                                     zoom_range = 0.2,
                                     horizontal_flip = True)
print("训练集图片:")
training_set = train_image_gen.flow_from_directory('train1',
                                                 target_size = target_size,
                                                 batch_size = batch_size,
                                                 color_mode="rgb",
                                                 class_mode = 'binary')

valid_image_gen = ImageDataGenerator(rescale = 1./255,
                                     shear_range = 0.2,
                                     zoom_range = 0.2,
                                     horizontal_flip = True)
print("验证集图片:")
validation_set = valid_image_gen.flow_from_directory('valid',
                                                 target_size = target_size,
                                                 batch_size = batch_size,
                                                 color_mode="rgb",
                                                 class_mode = 'binary')

训练集图片:
Found 22000 images belonging to 2 classes.
验证集图片:
Found 3000 images belonging to 2 classes.


## 开始训练模型

In [18]:
model.fit_generator(training_set, steps_per_epoch = 500, epochs = 25, 
                    validation_data=validation_set, validation_steps = 500)

Epoch 1/25


StopIteration: [Errno 2] No such file or directory: 'train1/dogs/dog.410.jpg'

In [20]:
file_path = os.getcwd() + '/train1/dogs/dog.410.jpg'

image = Image.open('/train1/dogs/dog.410.jpg')
image.show()

FileNotFoundError: [Errno 2] No such file or directory: './train1/dogs/dog.410.jpg'