# InceptionResNetV2上的测试

In [1]:
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *
import numpy as np
from sklearn.utils import shuffle
import pandas as pd

import h5py
import os
import shutil

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  (fname, cnt))
  (fname, cnt))


## 数据处理
读取训练和测试图片，格式化成（299,299,3）的格式。并且划分训练集和验证集

In [2]:
import numpy as np
from tqdm import tqdm
import cv2

np.random.seed(2017)

n = 24962
X = np.zeros((n, 299, 299, 3), dtype=np.uint8)
y = np.zeros((n, 1), dtype=np.uint8)

train_cat2 = os.listdir("train2/cat")
train_dog2 = os.listdir("train2/dog")

i = 0
for filename in train_cat2:
    X[i] = cv2.resize(cv2.imread('train/%s' % filename), (299, 299))
    y[i] = 0
    i += 1
for filename in train_dog2:
    X[i] = cv2.resize(cv2.imread('train/%s' % filename), (299, 299))
    y[i] = 1
    i += 1

In [3]:
m = 12500
X_test = np.zeros((m, 299, 299, 3), dtype=np.uint8)

test_file = os.listdir("test")
j = 0
for filename in test_file:
    X_test[j] = cv2.resize(cv2.imread('test/%s' % filename), (299, 299))
    j += 1

In [4]:
print("i=", i)
print("X_test size:", X_test.shape)

i= 24962
X_test size: (12500, 299, 299, 3)


In [9]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2)

## 模型一
去除顶层，锁定其他所有层，不参与训练；在这模型的基础上，加上GAP层和Dropout层，加sigmoid激活函数得出二分类的结果

In [11]:
base_model = InceptionResNetV2(input_tensor=Lambda(inception_resnet_v2.preprocess_input)(Input((299, 299, 3))), weights='imagenet', include_top=False)

for layers in base_model.layers:
    layers.trainable = False

x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(base_model.input, x)

In [12]:
model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [13]:
model.fit(X_train, y_train, batch_size=32, epochs=5, validation_data=(X_valid, y_valid))

Train on 19969 samples, validate on 4993 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f1d26754fd0>

In [14]:
for i in range(len(model.layers)):
    print(i,model.layers[i].name)

0 input_3
1 lambda_2
2 conv2d_315
3 batch_normalization_315
4 activation_314
5 conv2d_316
6 batch_normalization_316
7 activation_315
8 conv2d_317
9 batch_normalization_317
10 activation_316
11 max_pooling2d_8
12 conv2d_318
13 batch_normalization_318
14 activation_317
15 conv2d_319
16 batch_normalization_319
17 activation_318
18 max_pooling2d_9
19 conv2d_323
20 batch_normalization_323
21 activation_322
22 conv2d_321
23 conv2d_324
24 batch_normalization_321
25 batch_normalization_324
26 activation_320
27 activation_323
28 average_pooling2d_3
29 conv2d_320
30 conv2d_322
31 conv2d_325
32 conv2d_326
33 batch_normalization_320
34 batch_normalization_322
35 batch_normalization_325
36 batch_normalization_326
37 activation_319
38 activation_321
39 activation_324
40 activation_325
41 mixed_5b
42 conv2d_330
43 batch_normalization_330
44 activation_329
45 conv2d_328
46 conv2d_331
47 batch_normalization_328
48 batch_normalization_331
49 activation_327
50 activation_330
51 conv2d_327
52 conv2d_329
5

In [15]:
model.save_weights('inceptionresnetv2_weights_1.h5')  
y_pred = model.predict(X_test, verbose=1)



In [16]:
y_pred = y_pred.clip(min=0.005, max=0.995)
df = pd.read_csv("sample_submission.csv")
for i, fname in enumerate(test_file):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('inceptionresnetv2_predict_1.csv', index=None)
df.head(2)



Unnamed: 0,id,label
0,1,0.995
1,2,0.991332


在kaggle上得分是0.07994

## 模型二
fine-tuning 锁前605层，放开605层之后的层参与训练

In [17]:
for layers in base_model.layers:
    layers.trainable = False
for layer in model.layers[605:]:
    layer.trainable = True
model.load_weights('inceptionresnetv2_weights_1.h5')
model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [18]:
model.fit(X_train, y_train, batch_size=32, epochs=5, validation_data=(X_valid, y_valid))
model.save_weights('inceptionresnetv2_weights_2.h5') 

Train on 19969 samples, validate on 4993 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
y_pred = model.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)
df = pd.read_csv("sample_submission.csv")
for i, fname in enumerate(test_file):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('inceptionresnetv2_predict_2.csv', index=None)
df.head(2)





Unnamed: 0,id,label
0,1,0.995
1,2,0.995


在kaggle上得分是0.04352

## 模型三
fine-tuning 锁前701层，放开701层之后的层参与训练

In [22]:
for layers in base_model.layers:
    layers.trainable = False
for layer in model.layers[701:]:
    layer.trainable = True
model.load_weights('inceptionresnetv2_weights_2.h5')
model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [23]:
model.fit(X_train, y_train, batch_size=32, epochs=5, validation_data=(X_valid, y_valid))

Train on 19969 samples, validate on 4993 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f1d1af0b860>

In [24]:
y_pred = model.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)
df = pd.read_csv("sample_submission.csv")
for i, fname in enumerate(test_file):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('inceptionresnetv2_predict_3.csv', index=None)
df.head(2)





Unnamed: 0,id,label
0,1,0.995
1,2,0.995


In [25]:
model.save_weights('inceptionresnetv2_weights_3.h5') 

在kaggle上得分是0.04522