In [None]:
#使用机器学习的手段，来判断图片的最大相似，如果超过60%，即可认为相似
#但需要注意一点的是，相似性只能按照图片最大到最小来匹配，小于60%的可以认为两者之间的相似度不存在，分值计算为
#0分，大于60%的，按照其相似度大小来计分
#返回一个位置和分值的列表
#重点要针对下 plot 绘制的那些图片，这很重要
#输入为两张彩色的图片，尺寸大小一致
import keras
import keras.backend as K
from keras.models import Sequential,Model, load_model
from keras.layers.core import Flatten, Dense
from keras.layers.convolutional import Convolution2D,ZeroPadding2D,Conv2D
from keras.layers.pooling import MaxPooling2D,AveragePooling2D,GlobalAveragePooling2D
from keras.callbacks import ReduceLROnPlateau,ModelCheckpoint
from keras.initializers import glorot_uniform
from keras.layers import AveragePooling2D,MaxPooling2D,Dropout

import cv2
import h5py
import os
import time
import pdb
import random
import math
import numpy as np
import matplotlib.pylab as plt
from datetime import datetime

np.random.seed(776)
paths = "../../data/Keras/COVID-19/"
epochs = 100
height, width = 224,224
batch_size = 50
info_string = "now"

In [None]:
def VGG16(nb_classes, num_input_channels=1024):
    """
    Build Convolution Neural Network
    args : nb_classes (int) number of classes
    returns : model (keras NN) the Neural Net model
    """

    model = Sequential()
    model.add(ZeroPadding2D((1, 1), input_shape=(224, 224,3)))
    model.add(Conv2D(64, (3,3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(AveragePooling2D((7, 7)))
    model.add(Flatten())
    # Add the W layer
    model.add(Dropout(0.1))
    model.add(Dense(4096, activation='relu'))
    model.add(Dense(4096, activation='relu'))
    model.add(Dense(nb_classes, activation='softmax'))

    model.name = "VGG16"

    return model 

In [None]:
def readPath(paths):
    imgs_path = []
    for path in os.listdir(paths):
        if(not os.path.isfile(paths+path)):
            for filename in (os.listdir(paths+path)):
                if (filename.endswith('.jpg') or filename.endswith('.png')):
                    imgs_path.append(paths+path+'/'+filename)
                else:
                    print("erros file1 is %s"%filename)
    #数据集打散然后重新组合，因为训练过程中会导致前面训练的类别被弱化
    random.shuffle(imgs_path)
    #pdb.set_trace()
    return (imgs_path)
def readData(path):
    imgs = []
    labs = []
    for filename in path:
        if ( "CT_COVID" in filename):
            labs.append([0,1])
            #pdb.set_trace()
        elif ("CT_NonCOVID" in filename):
            labs.append([1,0])
        else:
            print("erros file2 is %s"%filename)
            #sys.exit()
        #filename = path + '/' + filename
        img = cv2.imread(filename)
        img = cv2.resize(img, (height, width))
        if( "2020.01.24.919183-p27-132" in filename):
            plt.imshow(img)
        #图片归一化处理
        imgs.append(img)
    return np.array(imgs),np.array(labs)

In [None]:
#训练模型
def train(model,checkpoint,learning_rate_reduction,imgs_path):
    for epoch in range(epochs):
        # 记录时间
        num_steps_burn_in = 10
        # 先定义预热轮数（头几轮跌代有显存加载、cache命中等问题因此可以跳过，只考量10轮迭代之后的计算时间）
        total_duration = 0.0 
        # 记录总时间
        total_duration_squared = 0.0 
        # 总时间平方和  -----用来后面计算方差
        start_time = time.time() 
        for i in range(int(len(imgs_path) / batch_size)):
            print('%.3f / %.3f<=====>%.3f / %.3f'%(epoch,epochs,i,int(len(imgs_path) / batch_size)))
            if ((i+1)*batch_size <= len(imgs_path)):

                train_x,train_y = readData(imgs_path[i*batch_size:(i+1)*batch_size])
            else:
                train_x,train_y = readData(imgs_path[i**batch_size:])
            start_time = time.time() 
            # 记录时间
            #pdb.set_trace()
            #
            model.fit(train_x, train_y, epochs=1, batch_size=batch_size,callbacks=[checkpoint,learning_rate_reduction])
            model.predict(train_x,batch_size=batch_size,verbose=0)
            #pdb.set_trace()
            #每十轮输出一次
            duration = time.time() - start_time
            #每十轮输出一次
            if i >= num_steps_burn_in:
                if not i % 10:
                    print ('%s: step %d, duration = %.3f' %
                   (datetime.now(), i - num_steps_burn_in, duration))
            total_duration += duration  
            # 累加便于后面计算每轮耗时的均值和标准差
            total_duration_squared += duration * duration
        mn = total_duration / int(len(imgs_path) / batch_size) # 每轮迭代的平均耗时
        vr = total_duration_squared / int(len(imgs_path) / batch_size) - mn * mn
        # 方差，是把一般的方差公式进行化解之后的结果，值得 借鉴
        sd = math.sqrt(vr) # 标准差
        print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
             (datetime.now(), info_string, int(len(imgs_path) / batch_size), mn, sd))
    #保存模型到本地
    model.save(os.path.join('./', 'my_model.h5'))
    model.save_weights(os.path.join('./', 'deep_dream.hdf5'))

#训练测试
def test(use_local_model, imgs_path):
    total_duration = 0.0 
    # 记录总时间
    total_duration_squared = 0.0 
    if (use_local_model):
        model = load_model(os.path.join('./', 'my_model.h5'))
    error_sum = 0
    for i in range(int(len(imgs_path) / batch_size)):
        num_steps_burn_in = 10
        start_time = time.time() 
        if ((i+1)*batch_size <= len(imgs_path)):
            test_x,test_y = readData(imgs_path[i*batch_size:(i+1)*batch_size])
        else:
            test_x,test_y = readData(imgs_path[i*batch_size:])
        start_time = time.time() 
        # 记录时间
        y=model.predict(test_x,batch_size=batch_size,verbose=0)
        #每十轮输出一次
        duration = time.time() - start_time
        #每十轮输出一次
        if i >= num_steps_burn_in:
            if not i % 10:
                print ('%s: step %d, duration = %.3f' %
               (datetime.now(), i - num_steps_burn_in, duration))
        total_duration += duration  
        # 累加便于后面计算每轮耗时的均值和标准差
        total_duration_squared += duration * duration

        for j in range(batch_size):
            #pdb.set_trace()
            if ((np.round(y[j])==test_y[j]).all()):
                pass
            else:
                error_sum += 1
        #pdb.set_trace()
        print("正确率 acc predict is %.3f"%(1-(error_sum/batch_size)))
        error_sum = 0

    mn = total_duration / int(len(imgs_path) / batch_size) # 每轮迭代的平均耗时
    vr = total_duration_squared / int(len(imgs_path) / batch_size) - mn * mn
    # 方差，是把一般的方差公式进行化解之后的结果，值得 借鉴
    sd = math.sqrt(vr) # 标准差
    print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
             (datetime.now(), info_string, int(len(imgs_path) / batch_size), mn, sd))
    #print(y,test_y)

In [None]:
if __name__ == "__main__":
    imgs_path = readPath(paths)
    np.random.shuffle(imgs_path)
    imgs_path_train = imgs_path[0 : int(len(imgs_path)*0.8)]
    imgs_path_test  = imgs_path[int(len(imgs_path)*0.8) : ]
    nb_classes, num_input_channels = 2 , 1024
    model = VGG16(nb_classes, num_input_channels)
    model.summary()
    learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, verbose=1, factor=0.1, min_lr=0.001)
    model.compile(optimizer="sgd", loss='categorical_crossentropy',metrics=['accuracy'])
    checkpoint = ModelCheckpoint('weights.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max', period=1)
    #使用已经训练好的网络或者重新训练网络
    use_local_model = True
    train(model,checkpoint,learning_rate_reduction,imgs_path_train)
    test(use_local_model,imgs_path_test)