## 加载数据集

In [2]:
import numpy as np
from tqdm import tqdm
import cv2

n = 25000
img_width = 224
img_height = 224

X = np.zeros((n,img_width,img_height,3),dtype=np.uint8)
y = np.zeros((n,1),dtype = np.uint8)

for i in tqdm(range(n//2)):
    X[i] = cv2.resize(cv2.imread('train/cat.%d.jpg' %i),(img_width,img_height))
    X[i+n//2] = cv2.resize(cv2.imread('train/dog.%d.jpg' %i),(img_width,img_height))

y[n//2:] = 1

100%|██████████| 12500/12500 [01:15<00:00, 166.40it/s]


In [3]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.2)

## 构建模型

In [4]:
from keras.models import *
from keras.applications import *
from keras.layers import *
from keras.preprocessing import *

Using TensorFlow backend.


In [7]:
base_model = ResNet50(input_tensor=Input((img_width,img_height,3)),weights='imagenet',include_top=False,pooling='avg')

In [22]:
# len(base_model.layers) - 1
NB_LAYER_FREEZE = 162
for layer in base_model.layers[:NB_LAYER_FREEZE]:
    layer.trainable = False
for layer in base_model.layers[NB_LAYER_FREEZE:]:
    layer.trainable = True
    
x = Dropout(0.25)(base_model.output)
x = Dense(1, activation='sigmoid')(x)
model = Model(base_model.input, x)

In [23]:
model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [24]:
model.fit(X_train,y_train, batch_size=16, epochs=10, validation_data=(X_valid,y_valid))

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fe37b7f0978>

In [25]:
model.save("models/resnet_50_162_notrainable")

## 加载模型

In [1]:
from keras.models import *
from keras.applications import *
from keras.layers import *
from keras.preprocessing import *

Using TensorFlow backend.


In [2]:
model_a = load_model("models/resnet_50_162_notrainable")

In [2]:
model_b = load_model("models/resnet_50_last_layer_able")

## 测试模型

加载测试数据

In [4]:
import numpy as np
from tqdm import tqdm
import cv2
n = 12500
img_width = 224
img_height = 224

test = np.zeros((n,img_width,img_height,3),dtype=np.uint8)

for i in tqdm(range(n)):
    test[i] = cv2.resize(cv2.imread('test1/%d.jpg' %(i+1)),(img_width,img_height))

100%|██████████| 12500/12500 [00:24<00:00, 503.11it/s]


In [7]:
 test_predict = model_a.predict(test,batch_size=32)

In [5]:
test_b_predict = model_b.predict(test,batch_size=32)

In [6]:
test_predict = test_b_predict.clip(min=0.005, max=0.995)

import pandas as pd
from keras.preprocessing.image import *

df = pd.read_csv("sample_submission.csv")

for i,y in enumerate(test_predict):
    df.set_value(i,'label',float(y[0]))

df.to_csv('resnet_last_pred.csv',index = None)
df.head(10)

Unnamed: 0,id,label
0,1,0.995
1,2,0.995
2,3,0.995
3,4,0.995
4,5,0.005
5,6,0.005
6,7,0.005
7,8,0.005
8,9,0.005
9,10,0.005
