In [0]:
! pip install tifffile



# This is the class for creating K fold validation data

In [0]:
import numpy as np
import skimage.io as io
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.model_selection import KFold
import os

In [0]:
class K_fold_validataion():
    def __init__(self,root='/content/drive/My Drive/'):
        self.root=root
        if os.path.exists(self.root+'data1/1_fold'):
            print('K fold validation data has exsisted. No need to create folders')
            return 
        else:
            self.mkSubFold(5)
        
    def findAllData(self):
        alltrain = io.imread(self.root+"data1/images/train-volume00.jpg",as_gray=True)
        alllabel = io.imread(self.root+"data1/labels/train-labels00.jpg",as_gray=True)
        alltrain = np.reshape(alltrain,alltrain.shape+(1,))
        alllabel = np.reshape(alllabel,alllabel.shape+(1,))
    #     print(alltrain.shape)
    #     print(alllabel.shape)
        for i in range(1,30):
            eachimg = io.imread(self.root+"data1/images/train-volume"+str(i).zfill(2)+".jpg",as_gray=True)
            eachlabel = io.imread(self.root+"data1/labels/train-labels"+str(i).zfill(2)+".jpg",as_gray=True)
            eachimg = np.reshape(eachimg,eachimg.shape+(1,))
            eachlabel = np.reshape(eachlabel,eachlabel.shape+(1,))
            alltrain = np.concatenate([alltrain,eachimg],axis = -1)
            alllabel = np.concatenate([alllabel,eachlabel],axis = -1)
        print(alltrain.shape)
        print(alllabel.shape)
        return alltrain,alllabel
      
    def makeDir(self,save_path):
        os.makedirs(self.root+save_path+"train/images/")
        os.makedirs(self.root+save_path+"train/labels/")
        os.makedirs(self.root+save_path+"validation/images/")
        os.makedirs(self.root+save_path+"validation/labels/")
        os.makedirs(self.root+save_path+"jpg")
        os.makedirs(self.root+save_path+"tif")
    def writeData(self,train_img,train_label,val_img,val_label,foldNum):
        save_path = "data1/"+str(foldNum)+"_fold/"
        self.makeDir(save_path)
        for i in range(train_img.shape[2]):
            io.imsave(self.root+save_path+"train/images/"+str(i).zfill(2)+".jpg",train_img[:,:,i],quality=100)
            io.imsave(self.root+save_path+"train/labels/"+str(i).zfill(2)+".jpg",train_label[:,:,i],quality=100)
        for i in range(val_img.shape[2]):
            io.imsave(self.root+save_path+"validation/images/"+str(i).zfill(2)+".jpg",val_img[:,:,i],quality=100)
            io.imsave(self.root+save_path+"validation/labels/"+str(i).zfill(2)+".jpg",val_label[:,:,i],quality=100)
     
    def mkSubFold(self,k):
        alltrain,alllabel = self.findAllData()
        indices = np.arange(alltrain.shape[2])
    #     np.random.shuffle(indices)
    #     alltrain = alltrain[:,:,indices]
    #     alllabel = alllabel[:,:,indices]
        numPairsData = len(indices)
        kf = KFold(n_splits=k,shuffle=True,random_state=1)
        foldNum = 1
        for trainIdx,valIdx in kf.split(indices):
            train_img = alltrain[:,:,trainIdx]
            train_label = alllabel[:,:,trainIdx]
            val_img = alltrain[:,:,valIdx]
            val_label = alllabel[:,:,valIdx]
            self.writeData(train_img,train_label,val_img,val_label,foldNum)
            foldNum+=1 


In [0]:
k_fold=K_fold_validataion(root='/content/drive/My Drive/')


K fold validation data has exsisted. No need to create folders


# The following part is the training part

In [0]:
from keras.optimizers import *
from keras.layers import Conv2D,MaxPooling2D,Dense,Dropout,UpSampling2D,concatenate,Input,BatchNormalization,LeakyReLU,Conv2DTranspose
from keras.models import Sequential,Model
from keras.preprocessing.image import ImageDataGenerator,load_img,img_to_array,array_to_img
from keras.callbacks import ModelCheckpoint,EarlyStopping,ReduceLROnPlateau
import os
import glob
import numpy as np
import time
import random
from matplotlib import pyplot as plt
from tifffile import imsave

Using TensorFlow backend.


## Use classes to encapsulate different methods


In [0]:
class Unet():
  
    # initialize parameters
    def __init__(self,img_height=512,img_width=512,data_path='data1',img_type='jpg',test_img_path='test_images',result_img_path='result_images'):
        self.img_height=img_height
        self.img_width=img_width
        self.data_path=data_path
        self.img_type=img_type
        self.npy_path=data_path
        self.test_img_path=test_img_path
        self.result_img_path=result_img_path
        self.tif='tif'
    
    # This method is used for offline training to generate more training images,
    # but found that offline training is not a good method to train the network
    # Aftering doing a lot of experiments, I use online training instead of offline training
    def load_data_to_gen_more_img(self):
        datagen = ImageDataGenerator(
            featurewise_center=True,
            featurewise_std_normalization=True,
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            horizontal_flip=True
        )
        train_images=glob.glob(self.data_path+'/images/*.'+self.img_type)
        train_images=sorted(train_images)
        test_images=glob.glob(self.data_path+'/labels/*'+self.img_type)
        test_images=sorted(test_images)
        train_images_data_list=[]
        test_images_data_list=[]
        print(train_images_data_list)
        for idx in range(len(train_images)):
            train_images_data_list.append(img_to_array(load_img(train_images[idx])))
            test_images_data_list.append(img_to_array(load_img(test_images[idx])))
            
        test_images_data_arr=np.array(test_images_data_list)

        train_images_data_arr=np.array(train_images_data_list)
        
        gen_data_train = datagen.flow(train_images_data_arr, batch_size=30, shuffle=False, save_to_dir=self.data_path+'/trans_images',
                                               save_prefix='trans-train',save_format='jpg',seed=1)
    
        gen_data_labels = datagen.flow(test_images_data_arr, batch_size=30, shuffle=False, save_to_dir=self.data_path+'/trans_labels',
                                               save_prefix='trans-labels',save_format='jpg',seed=1)
        for i in range(10):
            print('gens '+ str(30*(1+i)) +' images')
            gen_data_train.next()
            gen_data_labels.next()
    
    # This is used for renaming the images in offline training
    def rename_file(self):
        train_path =self.data_path+'/trans_images'
        test_path =self.data_path+'/trans_labels'
        files=sorted(os.listdir(train_path))
        files1=sorted(os.listdir(test_path))
        print('num of trans_images ',len(files))
        print('num of trans_labels ',len(files1))
        if len(files1)!=len(files):
            sys.exit()
        for idx in range(len(files)):
            if (idx+1)%100==0:
                print('rename'+ str(idx+1)+ 'images')
            if files[idx]!='.DS_Store':
                filename=str(idx)
                os.rename(os.path.join(train_path,files[idx]),os.path.join(train_path,filename+"."+self.img_type))
            if files1[idx]!='.DS_Store':
                filename=str(idx)
                os.rename(os.path.join(test_path,files1[idx]),os.path.join(test_path,filename+"."+self.img_type))
    
   # A part of offline training
    def create_train_data(self):
#         if os.path.exists(self.data_path+'/image_train.npy') or os.path.exists(self.data_path+'/image_labels.npy'):
#             print('training data npy already exists...........')
#             return
        print('loading data.........................')
        train_images=glob.glob(self.data_path+'/images/*.'+self.img_type)
        train_images=sorted(train_images)
        train_images_trans=glob.glob(self.data_path+'/trans_images/*'+self.img_type)
        train_images_trans=sorted(train_images_trans)

        train_images_data_list=[]
        
        train_image_paths=train_images_trans+train_images

        imgdatas_train=np.ndarray((len(train_image_paths),512,512,1),dtype=np.float32)
        count=0
        for path in train_image_paths:
            img=load_img(path,color_mode='grayscale')
            img=img_to_array(img)
            img_train=img/255

            imgdatas_train[count]=img_train
            if count%100==0:
                print('create data '+str(count)+' images')
            count+=1
        np.save(self.npy_path+'/image_train.npy',imgdatas_train)
        
        test_images=glob.glob(self.data_path+'/labels/*.'+self.img_type)
        test_images=sorted(test_images)
        
        test_images_trans=glob.glob(self.data_path+'/trans_labels/*'+self.img_type)
        test_images_trans=sorted(test_images_trans)
        
        test_images_data_list=[] 
        test_image_paths=test_images_trans+test_images
        imgdatas_test=np.ndarray((len(test_image_paths),self.img_height,self.img_width,1),dtype=np.float32)
        count1=0
        for path in test_image_paths:
            img=load_img(path,color_mode = "grayscale")
            img=img_to_array(img)
            if count1%100==0:
                print('create data '+str(count1)+' labels')
#             print('load data',img_train.shape,img_label.shape)
            img_label=img/255

            img_label[img_label > 0.5] = 1.0
            img_label[img_label <= 0.5] = 0.0
            
            imgdatas_test[count1]=img_label
            count1+=1
        np.save(self.npy_path+'/image_labels.npy',imgdatas_test)
        print('finish loading data.............................')
  
    # A part of offline training
    def load_data_to_train(self):
        img_train=np.load(self.npy_path+'/image_train.npy')
        img_label=np.load(self.npy_path+'/image_labels.npy')
        return img_train,img_label
    
    # Building the network model
    def create_unet(self):
        inputs=Input((self.img_height,self.img_width,1))
        conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
        conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
        pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
        conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
        conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
        pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
        conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2)
        conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
        pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
        conv4 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3)
        conv4 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4)
        drop4 = Dropout(0.7)(conv4)
        pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

        conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4)
        conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
        drop5 = Dropout(0.7)(conv5)

        up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop5))
        merge6 = concatenate([drop4,up6], axis = 3)
        conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6)
        conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6)
        conv6 = Dropout(0.7)(conv6)
        
        up7 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6))
        merge7 = concatenate([conv3,up7], axis = 3)
        conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7)
        conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7)
        conv7 = Dropout(0.7)(conv7)
        
        up8 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7))
        merge8 = concatenate([conv2,up8], axis = 3)
        conv8 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8)
        conv8 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8)
        conv8 = Dropout(0.7)(conv8)
        
        up9 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8))
        merge9 = concatenate([conv1,up9], axis = 3)
        conv9 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9)
        conv9 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
        conv9 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
        conv10 = Conv2D(1, 1, activation = 'sigmoid')(conv9)
        
        model=Model(input=inputs,output=conv10)
        return model
    
    # Using offline training method to train the network
    def train(self):
        if os.path.exists(self.data_path+'/unet.hdf5'):
            print('file exists, not need to train....................')
            model=self.create_unet()
            model.load_weights(self.data_path+'/unet.hdf5')
            
            predict_imgs=np.load(self.npy_path+'/predict_imgs.npy')
            result=model.predict(predict_imgs,batch_size=1,verbose=1)
            
            np.save(self.npy_path+'/predict_imgs_result.npy',result)
            
        else:
            print('loading data to train the model.................')
            img_train,img_label=self.load_data_to_train()
            print('img_train',img_train.shape)
            print('img_label',img_label.shape)

            
            check_point=ModelCheckpoint(self.data_path+'/unet.hdf5',monitor='val_loss',verbose=1,save_best_only=True)
            early_stop=EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto')
            schedular_unet=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)
            model=self.create_unet()
            model.compile(optimizer=Adam(lr=0.0001),loss='binary_crossentropy',metrics=['accuracy'])

            history=model.fit(img_train,img_label,batch_size=2,nb_epoch=10,verbose=1,validation_split=0.2,shuffle=True,callbacks=[schedular_unet,check_point,early_stop])
            self.plt_hist(history)

            with open(self.data_path+'/log_unet_1000.txt','w') as f:
              f.write(str(history.history))
            np.save(self.npy_path+'/mdoel',model)

            predict_imgs=np.load(self.npy_path+'/predict_imgs.npy')

            result=model.predict(predict_imgs,batch_size=1,verbose=1)
            np.save(self.npy_path+'/predict_imgs_result.npy',result)
    
    # Plot the training history image and save it in the specific folder
    def plt_hist(self,history,folder_path):
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.plot(history.history['val_acc'])
        plt.plot(history.history['acc'])
        plt.title("model loss")
        plt.ylabel("loss")
        plt.xlabel("epoch")
        plt.legend(["loss","val_loss","val_acc","acc"],loc="upper left")
        current_time=time.strftime("%Y%m%d%H%M%S", time.localtime())

        plt.savefig(folder_path+'/val_loss_'+current_time+'.jpg')
        plt.show()
    
    # Save the test image to the specified folder
    def save_img(self,folder_path):
        result_npy=np.load(self.npy_path+'/predict_imgs_result.npy')
        for i in range(result_npy.shape[0]):
            img=result_npy[i]
#             print(np.max(img))
#             max_value=np.max(img)
#             img[img/max_value > 0.5] = 255
#             img[img/max_value <= 0.5] = 0
#             img[img>0.5]=1
#             img[img<=0.5]=0
#             img_tif=tifffile.imsave(img)
            img2=array_to_img(img)
            print(i)
            img2.save(folder_path+'/jpg/'+str(i)+'.'+self.img_type)
            imsave(folder_path+'/tif/'+str(i)+'.'+self.tif,img)
    
    # Loding test images and create a npy file to store the test images
    def predict_img(self):
        if os.path.exists(self.data_path+'/predict_imgs.npy'):
            print('predict data npy has already exists..................')
            return
        selected_paths=sorted(glob.glob(self.data_path+'/'+self.test_img_path+'/*.'+self.img_type))
        predict_images=np.ndarray((len(selected_paths),self.img_height,self.img_width,1),dtype=np.int)
        
        for idx in range(len(selected_paths)):
            img=load_img(selected_paths[idx],color_mode='grayscale')
            img=img_to_array(img)
            predict_images[idx]=img
        np.save(self.npy_path+'/predict_imgs.npy',predict_images)
        print('creating predict images finish........................')
    
    # K flod validation (5 fold validation)
    def K_fold_val(self):
        model=self.create_unet()
        files=glob.glob(self.data_path+'/*')
        K_fold_paths=[obj for obj in files if obj.endswith('_fold')]
        for folder_path in K_fold_paths:
            train_path=folder_path+'/train'
            val_path=folder_path+'/validation'
            self.train_fit_gen(folder_path,train_path,val_path,model)

        return K_fold_paths
      
    # Using online training to train the model
    def train_fit_gen(self,folder_path,train_path,val_path,model):
        if os.path.exists(folder_path+'/unet_fit_gen.hdf5'):
            print('file exists, not need to train....................')
            model=self.create_unet()
            model.load_weights(folder_path+'/unet_fit_gen.hdf5')
            
            predict_imgs=np.load(self.npy_path+'/predict_imgs.npy')
            result=model.predict(predict_imgs,batch_size=1,verbose=1)
            
            np.save(self.npy_path+'/predict_imgs_result.npy',result)
            self.save_img(folder_path)
            return 
        
        data_gen_args = dict(rotation_range=0.2,
                    width_shift_range=0.1,
                    height_shift_range=0.1,
                    shear_range=0.1,
                    zoom_range=0.1,
                    horizontal_flip=True,
                    fill_mode='nearest')
        
        
        early_stop=EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto')
        myGene = self.trainGenerator(2,train_path,'images','labels',data_gen_args)
        schedular_unet=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)
        
        model.compile(optimizer=Adam(lr=0.0002),loss='binary_crossentropy',metrics=['accuracy'])
        model_checkpoint = ModelCheckpoint(folder_path+'/unet_fit_gen.hdf5', monitor='val_loss',verbose=1, save_best_only=True,mode='auto')

        myGene_test = self.trainGenerator(2,val_path,'images','labels',data_gen_args,seed=2)

        history=model.fit_generator(generator=myGene,validation_data=myGene_test,validation_steps=10,steps_per_epoch=1000,epochs=10,callbacks=[early_stop,schedular_unet,model_checkpoint],verbose=1,class_weight='auto')
        self.plt_hist(history,folder_path)

        with open(folder_path+'/log_unet.txt','w') as f:
            f.write(str(history.history))
        np.save(folder_path+'/mdoel.h5',model)

#         predict_imgs=np.load(self.npy_path+'/predict_imgs.npy')

#         result=model.predict(predict_imgs,batch_size=1,verbose=1)
        
#         np.save(self.npy_path+'/predict_imgs_result.npy',result)
    
    # Create a image generator to create augmented images
    def trainGenerator(self,batch_size,train_path,image_folder,label_folder,aug_dict,image_color_mode = "grayscale",
                    label_color_mode = "grayscale",target_size = (512,512),seed = 1):
  
        image_datagen = ImageDataGenerator(**aug_dict)
        label_datagen = ImageDataGenerator(**aug_dict)
        image_generator = image_datagen.flow_from_directory(
            train_path,
            classes = [image_folder],
            class_mode = None,
            color_mode = image_color_mode,
            target_size = target_size,
            batch_size = batch_size,
            seed = seed)
        label_generator = label_datagen.flow_from_directory(
            train_path,
            classes = [label_folder],
            class_mode = None,
            color_mode = label_color_mode,
            target_size = target_size,
            batch_size = batch_size,
            seed = seed)
        train_generator = zip(image_generator, label_generator)
        for (img,label) in train_generator:
            if np.max(img)>1:
              img = img / 255
              label = label /255
              label[label > 0.5] = 1
              label[label <= 0.5] = 0
            yield (img,label)
            
    
      
      
net=Unet(data_path='/content/drive/My Drive/data1',img_height=512,img_width=512,img_type='jpg',test_img_path='test_images',result_img_path='result_images')

# net.load_data_to_gen_more_img()
# net.rename_file()
# net.create_train_data()
# net.predict_img()
# net.train_fit_gen()
# net.train()
# net.save_img()
net.K_fold_val()



file exists, not need to train....................
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
file exists, not need to train....................
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
file exists, not need to train....................
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
file exists, not need to train....................
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
file exists, not need to train....................
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


['/content/drive/My Drive/data1/1_fold',
 '/content/drive/My Drive/data1/4_fold',
 '/content/drive/My Drive/data1/3_fold',
 '/content/drive/My Drive/data1/2_fold',
 '/content/drive/My Drive/data1/5_fold']