In [1]:
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import os
import cv2

from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPool2D, Dropout, BatchNormalization, Dense, Activation, GlobalAveragePooling2D
from keras.models import Model,Sequential
from keras.regularizers import l2
from keras.preprocessing.image import load_img,img_to_array
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

Using TensorFlow backend.


In [2]:
#制作训练集的图片id和标签
training_dir = '../input/train/'
testing_dir = '../input/test/'

train_files = os.listdir(training_dir)
test_files = os.listdir(testing_dir)

train_labels = []
for file in train_files:
    train_labels.append(file.split(".")[0])
    
df_train = pd.DataFrame({"id": train_files, "label": train_labels})
df_train.head()

Unnamed: 0,id,label
0,cat.11679.jpg,cat
1,dog.2811.jpg,dog
2,dog.2578.jpg,dog
3,dog.9238.jpg,dog
4,dog.7504.jpg,dog


In [3]:
df_test = pd.DataFrame({"id": test_files})
df_test["label"] = ["cat"]*(len(test_files))
df_test.head()

Unnamed: 0,id,label
0,3090.jpg,cat
1,8785.jpg,cat
2,10679.jpg,cat
3,7247.jpg,cat
4,8151.jpg,cat


In [4]:
#制作keras数据生成器
classes = ['cat', 'dog']

def get_data(batch_size=32, target_size=(96,96), class_mode="categorical", training_dir=training_dir,
             testing_dir=testing_dir, classes=classes, df_train=df_train, df_test=df_test):
    
    train_datagen = ImageDataGenerator(horizontal_flip=True, shear_range=0.2,zoom_range=0.2,
        rescale=1.0/255,validation_split=0.25)
    test_datagen = ImageDataGenerator(rescale=1.0/255)
    
    train_generator = train_datagen.flow_from_dataframe(df_train, training_dir, x_col='id', y_col='label', 
        has_ext=True, target_size=target_size, classes = classes, class_mode=class_mode, 
        batch_size=batch_size, shuffle=True, seed=42,subset='training')
    
    validation_generator = train_datagen.flow_from_dataframe(df_train, training_dir, x_col='id', y_col='label', 
        has_ext=True, target_size=target_size, classes = classes, class_mode=class_mode, 
        batch_size=batch_size, shuffle=True, seed=42, subset='validation')

    test_generator = test_datagen.flow_from_dataframe(df_test, testing_dir, x_col='id', y_col='label', 
        has_ext=True, target_size=target_size, classes = classes, class_mode=class_mode, 
        batch_size=batch_size, shuffle=False)
    
    steps_per_epoch = len(train_generator)
    validation_steps = len(validation_generator)
    
    return train_generator, validation_generator, test_generator,  steps_per_epoch, validation_steps

In [5]:
def base_model():
    model = Sequential()
    
    model.add(Conv2D(32, (3, 3), input_shape=(299, 299, 3), padding='same', use_bias=False, kernel_regularizer=l2(1e-4)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3), padding='same', use_bias=False, kernel_regularizer=l2(1e-4)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3), padding='same', use_bias=False, kernel_regularizer=l2(1e-4)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPool2D())
    
    model.add(Conv2D(64, (3, 3), padding='same', use_bias=False, kernel_regularizer=l2(1e-4)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3), padding='same', use_bias=False, kernel_regularizer=l2(1e-4)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3), padding='same', use_bias=False, kernel_regularizer=l2(1e-4)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPool2D())

    model.add(Conv2D(128, (3, 3), padding='same', use_bias=False, kernel_regularizer=l2(1e-4)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(128, (3, 3), padding='same', use_bias=False, kernel_regularizer=l2(1e-4)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(128, (3, 3), padding='same', use_bias=False, kernel_regularizer=l2(1e-4)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPool2D())
    
    model.add(GlobalAveragePooling2D())
    model.add(Dense(2, activation='softmax'))
    
    return model

In [6]:
#读取数据
batch_size = 32
target_size = (299, 299)
train_generator, validation_generator, test_generator, steps_per_epoch, validation_steps = get_data(batch_size=batch_size, target_size=target_size, classes=classes, df_test=df_test)
#建立模型
model = base_model()
optimizer = Adam(0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'] )
checkpoint = ModelCheckpoint('model.hdf5', monitor='val_acc', save_best_only=True)
callbacks = [checkpoint]
#开始训练
history = model.fit_generator(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=3,
    verbose=1,
    callbacks=callbacks,
    validation_data=validation_generator,
    validation_steps=validation_steps)

Found 18750 images belonging to 2 classes.
Found 6250 images belonging to 2 classes.
Found 12500 images belonging to 2 classes.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [7]:
def generate_result(model, test_generator, nsteps=len(test_generator)):
    y_preds = model.predict_generator(test_generator, steps=nsteps, verbose=1) 
    return y_preds, y_preds[:,1]

y_preds_all, y_preds = generate_result(model, test_generator)       



In [8]:
df_test = pd.DataFrame({"id": test_generator.filenames, "label": y_preds})
df_test['id'] = df_test['id'].map(lambda x: x.split('.')[0])
df_test['id'] = df_test['id'].astype(int)
df_test = df_test.sort_values('id')
df_test.to_csv('submission.csv', index=False)
df_test.head()

Unnamed: 0,id,label
2322,1,0.213388
5025,2,0.079625
2481,3,0.045391
5068,4,0.891126
3495,5,0.004802
