Fonctions in batch processing:
- Fetch the folder names in the root dir, and automatically process all the slides;
- Split the 512x512 images and labels into 64x64, and only save the labels as a table of the sum of tumoral pixels;
- Predict the probabilities using our trained model, also saved as a table;
- Evaluation and save the ROC curve and optimal threshold.

In [1]:
import datetime
now1=datetime.datetime.now()

In [2]:
import os
import cv2
from collections import Counter
import csv
from keras.models import load_model
import numpy as np

Using TensorFlow backend.


In [3]:
root = 'E:\\deeplearning\\data\\5x\\detector\\test'
process = 'test'

In [5]:
def get_slide_name(rootpath): 
    L=[] 
    dirs = os.listdir(rootpath)
    for dir in dirs:
        L.append(dir)
    return L

#slide_names = get_slide_name(root)

In [9]:
def read_image_name(file_dir):
    Limg=[]
#    print(file_dir)
    for root, dirs, files in os.walk(file_dir):
        for file in files:
            if os.path.splitext(file)[1] == '.tif': #image
                Limg.append(os.path.join(root, file))
                
    return Limg #list of full dir

In [10]:
def read_label_name(file_dir):
    Lroi=[]
    for root, dirs, files in os.walk(file_dir):
        for file in files:
            if os.path .splitext(file)[1] == '.tiff': #label
                Lroi.append(os.path.join(root, file))
                
    return Lroi

In [11]:
def split64_image(slideName):
    image =  []
    limg = read_image_name(path_512)
#    print(limg)
    for k in limg:
        img=cv2.imread(k, -1)
        for i in range(int(512/64)):
            for j in range(int(512/64)):
                image1=img[j*64:(j+1)*64,i*64:(i+1)*64,:]

#                 nsub = len(read_image_name(path_64))
#                 print(path_64+'\\'+str(nsub).zfill(6)+'\\.tif')
#                 cv2.imwrite(path_64+'\\'+str(nsub).zfill(6)+'.tif',image1)

                image.append(cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)/255)
            
    image = np.asarray(image)

#             #research slower than copy to a new variable
#             image.append(img[0][j*64:(j+1)*64,i*64:(i+1)*64,:])
#             cv2.imwrite(path_64+'\\'+str(nsub).zfill(6)+'.tif',image[0])
    return image

# image=split64_image(slide_name)

In [12]:
def split64_label(slideName):
    lroi = read_label_name(path_512)
    roi_sub_npixel = []
    for a in lroi:
        roi = cv2.imread(a, -1)
        for b in range(int(512/64)):
            for c in range(int(512/64)):
                roi_sub_npixel.append(sum((roi[c*64:(c+1)*64,b*64:(b+1)*64]==3).sum(1)))

    with open(path_64+'\\label_tum_npixel', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(map(lambda x: [x], roi_sub_npixel))
        
    return roi_sub_npixel

# roi_sub_npixel = split64_label(slide_name)

In [13]:
def test(slideName):
    preds= model.predict(image)

    with open(path_result+'\\vgg_dense_bn_prob', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        writer.writerow(['tum','nor'])
        writer.writerows(preds)
        for l in preds:
            writer.writerow(l)
            
    return preds

# preds = test(slide_name)

In [15]:
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

def evaluate(slideName):
    gt = []
    for i in roi_sub_npixel:
        if i>2048:
            gt.append(1)
        else:
            gt.append(0)
            
    fpr, tpr, thresholds  =  roc_curve(y_test, preds[:,0])
    roc_auc = auc(fpr, tpr)
    # worse example (use the line y=1-x)
    # for i in range(len(fpr)):
    #     if fpr[i] + tpr[i] >= 1:
    #         i = i -1
    #         break

    # Normally, we use Youden's J statistic.
    i = np.argmax(tpr - fpr)

    # plt.plot(fpr,tpr) to draw. 'roc_auc' records the value of auc, which can be calculated by the auc() function.
    plt.plot(fpr, tpr, lw=1, label='auc = %0.3f, tpr = %0.3f, tnr = %0.3f, thres = %0.3f' % ( roc_auc, tpr[i], 1-fpr[i], thresholds[i]))
    # Draw diagonal
    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6))
    # Draw perfect preformance
    # plt.plot([0, 0, 1], [0, 1, 1],
    #          linestyle = ':',
    #          color = 'black')

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate (1 - Specificity)')
    plt.ylabel('True Positive Rate (Sensitivity)')
    plt.title(slide_name + ' ROC curve by Python')
    plt.legend(loc="lower right")
#     plt.show()
    plt.savefig(path_result+'//ROC.jpg')
    plt. close() 
    
    return gt
 
# gt = evaluate(slide_name)

In [26]:
if __name__ == '__main__':
    slide_names = get_slide_name(root)
    for i in range(60):
        slide_names.pop(0)
    print(slide_names)

    model = load_model('E:\\deeplearning\\Hepatocarcinomes\\models\\5x\\vgg16_dense_bn\\best_model', compile = True)

    for slide_name in slide_names:

        path_512 = 'E:\\deeplearning\\data\\5x\\detector\\' + process + '\\' + slide_name + '\\512_image_label'
        path_64 = 'E:\\deeplearning\\data\\5x\\detector\\' + process + '\\' + slide_name + '\\split64_image_label'
        path_result = 'E:\\deeplearning\\data\\5x\\detector\\' + process + '\\' + slide_name + '\\results'

        image=split64_image(slide_name)

        roi_sub_npixel = split64_label(slide_name)

        preds = test(slide_name)

        gt = evaluate(slide_name)

        del(image)
        del(preds)

['HMNT1677', 'HMNT1679', 'HMNT1688_bis', 'HMNT1714', 'HMNT1715_bis', 'HMNT1725', 'HMNT1729', 'HMNT1745_bis', 'HMNT1747_bis', 'HMNT1748', 'HMNT1749', 'HMNT1750', 'HMNT1751', 'HMNT1752_bis', 'HMNT1759', 'HMNT1760_bis', 'HMNT1762']


In [None]:
now2=datetime.datetime.now()
print("Runtime?" , now2-now1)

In [25]:
slide_names = get_slide_name(root)
for i in range(59):
    slide_names.pop(0)
print(slide_names)

['HMNT1666_bis', 'HMNT1677', 'HMNT1679', 'HMNT1688_bis', 'HMNT1714', 'HMNT1715_bis', 'HMNT1725', 'HMNT1729', 'HMNT1745_bis', 'HMNT1747_bis', 'HMNT1748', 'HMNT1749', 'HMNT1750', 'HMNT1751', 'HMNT1752_bis', 'HMNT1759', 'HMNT1760_bis', 'HMNT1762']


In [None]:
# del(image)
# del(preds)

In [17]:
print(slide_name)

HMNT1666_bis


In [None]:
print(len(slide_names))