# ResNet50上的测试

In [1]:
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *
import numpy as np
from sklearn.utils import shuffle
import pandas as pd

import h5py
import os
import shutil

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  (fname, cnt))
  (fname, cnt))


## 数据处理
读取训练和测试图片，格式化成（224,224,3）的格式。并且划分训练集和验证集

In [2]:
import numpy as np
from tqdm import tqdm
import cv2

np.random.seed(2017)

n = 24962
X = np.zeros((n, 224, 224, 3), dtype=np.uint8)
y = np.zeros((n, 1), dtype=np.uint8)

train_cat2 = os.listdir("train2/cat")
train_dog2 = os.listdir("train2/dog")

i = 0
for filename in train_cat2:
    X[i] = cv2.resize(cv2.imread('train/%s' % filename), (224, 224))
    y[i] = 0
    i += 1
for filename in train_dog2:
    X[i] = cv2.resize(cv2.imread('train/%s' % filename), (224, 224))
    y[i] = 1
    i += 1

In [3]:
m = 12500
X_test = np.zeros((m, 224, 224, 3), dtype=np.uint8)

test_file = os.listdir("test")
j = 0
for filename in test_file:
    X_test[j] = cv2.resize(cv2.imread('test/%s' % filename), (224, 224))
    j += 1

In [4]:
print("i=", i)
print("X_test size:", X_test.shape)

i= 24962
X_test size: (12500, 224, 224, 3)


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2)

## 模型一
去除顶层，锁定其他所有层，不参与训练；在这模型的基础上，加上GAP层和Dropout层，加sigmoid激活函数得出二分类的结果

In [6]:
base_model = ResNet50(input_tensor=Input((224, 224, 3)), weights='imagenet', include_top=False)

for layers in base_model.layers:
    layers.trainable = False

x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(base_model.input, x)

In [7]:
model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [8]:
model.fit(X_train, y_train, batch_size=16, epochs=5, validation_data=(X_valid, y_valid))

Train on 19969 samples, validate on 4993 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7ff09d3a8eb8>

In [12]:
for i in range(len(model.layers)):
    print(i,model.layers[i].name)

0 input_1
1 conv1_pad
2 conv1
3 bn_conv1
4 activation_1
5 max_pooling2d_1
6 res2a_branch2a
7 bn2a_branch2a
8 activation_2
9 res2a_branch2b
10 bn2a_branch2b
11 activation_3
12 res2a_branch2c
13 res2a_branch1
14 bn2a_branch2c
15 bn2a_branch1
16 add_1
17 activation_4
18 res2b_branch2a
19 bn2b_branch2a
20 activation_5
21 res2b_branch2b
22 bn2b_branch2b
23 activation_6
24 res2b_branch2c
25 bn2b_branch2c
26 add_2
27 activation_7
28 res2c_branch2a
29 bn2c_branch2a
30 activation_8
31 res2c_branch2b
32 bn2c_branch2b
33 activation_9
34 res2c_branch2c
35 bn2c_branch2c
36 add_3
37 activation_10
38 res3a_branch2a
39 bn3a_branch2a
40 activation_11
41 res3a_branch2b
42 bn3a_branch2b
43 activation_12
44 res3a_branch2c
45 res3a_branch1
46 bn3a_branch2c
47 bn3a_branch1
48 add_4
49 activation_13
50 res3b_branch2a
51 bn3b_branch2a
52 activation_14
53 res3b_branch2b
54 bn3b_branch2b
55 activation_15
56 res3b_branch2c
57 bn3b_branch2c
58 add_5
59 activation_16
60 res3c_branch2a
61 bn3c_branch2a
62 activatio

In [13]:
model.save_weights('resnet50_weights_1.h5')  
y_pred = model.predict(X_test, verbose=1)



In [14]:
y_pred = y_pred.clip(min=0.005, max=0.995)
df = pd.read_csv("sample_submission.csv")
for i, fname in enumerate(test_file):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('resnet50_predict_1.csv', index=None)
df.head(2)



Unnamed: 0,id,label
0,1,0.995
1,2,0.995


在kaggle上的得分是0.10671

## 模型二
fine-tuning 锁前101层，放开101层之后的层参与训练

In [15]:
for layers in base_model.layers:
    layers.trainable = False
for layer in model.layers[101:]:
    layer.trainable = True
model.load_weights('resnet50_weights_1.h5')
model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [16]:
model.fit(X_train, y_train, batch_size=32, epochs=5, validation_data=(X_valid, y_valid))

Train on 19969 samples, validate on 4993 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7ff3dcdbc3c8>

In [17]:
y_pred = model.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)
df = pd.read_csv("sample_submission.csv")
for i, fname in enumerate(test_file):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('resnet50_predict_2.csv', index=None)
df.head(2)





Unnamed: 0,id,label
0,1,0.995
1,2,0.995


在kaggle上的得分是0.09696

## 模型三
fine-tuning 锁前144层，放开144层之后的层参与训练

In [18]:
for layers in base_model.layers:
    layers.trainable = False
for layer in model.layers[144:]:
    layer.trainable = True
model.load_weights('resnet50_weights_1.h5')
model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [19]:
model.fit(X_train, y_train, batch_size=32, epochs=5, validation_data=(X_valid, y_valid))

Train on 19969 samples, validate on 4993 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7ff3dcdbc080>

In [20]:
y_pred = model.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)
df = pd.read_csv("sample_submission.csv")
for i, fname in enumerate(test_file):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('resnet50_predict_3.csv', index=None)
df.head(2)





Unnamed: 0,id,label
0,1,0.995
1,2,0.995


In [21]:
model.save_weights('resnet50_weights_3.h5') 

在kaggle上的得分是0.07540