In [1]:

#导入数据包

import matplotlib.pyplot as plt
import h5py
import numpy as np
from sklearn.utils import shuffle
from keras.applications import *
from keras.preprocessing.image import *
from keras.layers import *
from keras.preprocessing import image
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
from keras.models import *
import os

from keras.layers import Dense, GlobalAveragePooling2D, Dropout
from keras.layers.normalization import BatchNormalization 
from keras.callbacks import LearningRateScheduler,EarlyStopping
from keras.optimizers import SGD
from keras.models import Model

np.random.seed(2017)

Using TensorFlow backend.


In [2]:
#设置各种参数
data_path = './blood_cell_data/'
image_shape =(299,299,3)
layer_num = 280
MODEL = inception_v3.InceptionV3
preprocess_func = inception_v3.preprocess_input

In [3]:
def get_images_and_shapes(file_path):
    image_files = [os.path.join(file_path, file) for file in os.listdir(file_path) if file[0]!='.']
    images_shapes= []
    images_sizes =[]
    for ii, path in enumerate(image_files):
        img = image.load_img(path)
        shape = np.shape(img)
        images_shapes.append(shape)
        images_sizes.append(shape[0]*shape[1])
    print("finished")
    return image_files, images_shapes,images_sizes,len(image_files)


def get_X_y_files(data_path, image_shape, lambda_func=None):
    data_dirs = [path for path in os.listdir(data_path) if path[0]!='.']
    data_dic = dict()
    
    for path in data_dirs:
        files, shapes,sizes,count = get_images_and_shapes(os.path.join(data_path,path))
        data_dic.update({path:(files,shapes,sizes,count)})
    
    n_samples = 0
    for key,value in data_dic.items():
        n_samples += value[3]
    
    X = np.empty((n_samples, image_shape[0], image_shape[1],image_shape[2]), dtype=np.float32)
    y = np.empty((n_samples,1),dtype=np.str)
    
    files = []        
    i = 0    
    for key,value in data_dic.items():
        for ii, file in enumerate(value[0]):
            x = image.load_img(file, target_size=image_shape)
            x = image.img_to_array(x)
            x = np.expand_dims(x, axis=0)
            if lambda_func:
                x = lambda_func(x)
            X[i+ii]= x
            y[i+ii]= key
        files.append(value[0])
        i += value[3]
    files = np.concatenate(files)
    
    encoder = LabelEncoder()
    encoder.fit(y)

    encoded_y = encoder.transform(y)
    encoded_y = np_utils.to_categorical(encoded_y)
    
    return X, encoded_y, files, data_dirs


def get_splited_shuffled(X, encoded_y, files):
    rows = len(files)
    row_indices = np.random.permutation(rows)

    split_index = int(rows*0.9)

    X_train,X_test = X[row_indices[0:split_index],:],X[row_indices[split_index:],:]

    files_train,files_test = files[row_indices[0:split_index]],files[row_indices[split_index:]]

    encoded_y_train, encoded_y_test = encoded_y[row_indices[0:split_index],:],encoded_y[row_indices[split_index:],:]
    
    return X_train, X_test, encoded_y_train, encoded_y_test, files_train, files_test



In [4]:


def get_fine_tuning_first_model(MODEL):
    from keras.layers import Dense, GlobalAveragePooling2D, Dropout
    from keras.models import Model
    
    print("start")
    base_model = MODEL( weights='imagenet', include_top=False)
    print(base_model.input.shape)
    print(base_model.output.shape)

    top_x = base_model.output
    top_x = GlobalAveragePooling2D()(top_x)
    
    #top_x = Dense(256, activation='relu')(top_x)
    top_x = BatchNormalization()(top_x)
    top_x = Dropout(0.5)(top_x)
    #top_x = Dropout(0.5)(top_x)
    #top_x = Dense(64, activation='relu')(top_x)
    #top_x = BatchNormalization()(top_x)
    #top_x = Dropout(0.5)(top_x)
    top_x = Dense(4, activation='softmax')(top_x)
    model = Model(base_model.input, top_x)
    print(len(base_model.layers))

    for layer in base_model.layers:
        layer.trainable = False

    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [5]:
def step_decay(epoch):
    initial_lrate = 0.5
    drop = 0.5
    epochs_drop = 10.0
    lrate = initial_lrate * math.pow(drop,math.floor((1+epoch)/epochs_drop))
    return lrate
lrate = LearningRateScheduler(step_decay)
sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)

def get_fine_tuning_second_model(model, layer_num):
    for layer in model.layers[:layer_num]:
        layer.trainable = False
    for layer in model.layers[layer_num:]:
        layer.trainable = True

    from keras.optimizers import SGD

    model.compile(sgd, loss='categorical_crossentropy',metrics=['accuracy'])
    return model

In [6]:
#获取数据
X, y_encoded, image_files, classes = get_X_y_files(data_path, image_shape,lambda_func=preprocess_func)
print("finished")



#获取训练集，测试集
X_train, X_test, encoded_y_train, encoded_y_test, files_train, files_test = get_splited_shuffled(X, y_encoded, image_files)
print("finished")
del X
del y_encoded

finished
finished
finished
finished


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


finished
finished


In [7]:
#构造模型，锁定base_model所有层
model = get_fine_tuning_first_model(MODEL)

model.summary()
print("finished")

start
(?, ?, ?, 3)
(?, ?, ?, 2048)
311
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, None, None, 3 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, None, None, 3 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, None, None, 3 0           batch_normalization_1[0][0]      
______________________________________________________________________

__________________________________________________________________________________________________
conv2d_40 (Conv2D)              (None, None, None, 1 147456      average_pooling2d_4[0][0]        
__________________________________________________________________________________________________
batch_normalization_31 (BatchNo (None, None, None, 1 576         conv2d_31[0][0]                  
__________________________________________________________________________________________________
batch_normalization_34 (BatchNo (None, None, None, 1 576         conv2d_34[0][0]                  
__________________________________________________________________________________________________
batch_normalization_39 (BatchNo (None, None, None, 1 576         conv2d_39[0][0]                  
__________________________________________________________________________________________________
batch_normalization_40 (BatchNo (None, None, None, 1 576         conv2d_40[0][0]                  
__________

In [8]:
print("start")
#第一次训练新添加层权重
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
model.fit(X_train, encoded_y_train, batch_size=128, epochs=50, validation_split=0.2, callbacks=[early_stopping])
print("finished")

start
Train on 7168 samples, validate on 1793 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
finished


In [None]:
print("start")
#放开若干层权重，再次训练
model = get_fine_tuning_second_model(model, layer_num)
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto')
history = model.fit(X_train, encoded_y_train, batch_size=128, epochs=100, validation_split=0.2, callbacks=[early_stopping])
print("finished")

start
Train on 7168 samples, validate on 1793 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100

In [None]:
print(history.history.keys())

# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
from sklearn.metrics import accuracy_score

print('Predicting on test data')
y_pred = np.rint(model.predict(X_test))
print(y_pred[0:10])

print(accuracy_score(encoded_y_test, y_pred))


from sklearn.metrics import confusion_matrix

y_pred_unencoded = np.argmax(y_pred, axis=1)
y_test_unencoded = np.argmax(encoded_y_test, axis=1)

print(confusion_matrix(y_test_unencoded, y_pred_unencoded))    

In [None]:
#显示一张图片
def visualize_image(file,image_shape):
    print(f"随机抽取的测试图片路径:{file}")
    
    import matplotlib.pyplot as plt
    import PIL
    from keras.preprocessing import image
    get_ipython().magic('matplotlib inline')
    
    img = image.load_img(file, target_size=image_shape)
    img = image.img_to_array(img)
    img = img.astype(np.uint8)
    
    plt.imshow(img)

def random_predict_one_picture(classes, files_test, X_test):
    classes.sort()
    m = np.random.randint(len(files_test))
    p = model.predict(X_test[m:m+1,:])
    n = np.argmax(p, axis=1)
    visualize_image(files_test[m],(240,320))
    print(f"预测结果:{classes[n[0]]}:{p[0][n[0]]}")   

In [None]:
#从测试集中随机抽取一张图片预测
random_predict_one_picture(classes, files_test, X_test)