In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
select_label = '0'
path = 'D:\SRHproject\concat_images\concat_images'
image_size = 1024
n_classes = 2 # for binary
# n_classes  8
learning_rate = 0.001
epochs = 20

In [3]:
class_names = {'adenocarcinoma': 0,
            'hematopoietic ': 1,
            'squamous cell carcinoma ': 2,
            'melanoma': 3,
            'sarcoma ': 4,
            'neuroendocrine carcinoma ': 5,
            'urothelial carcinoma ': 6,
            'non-tumor':7}

In [None]:
import keras
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os, cv2, shutil
import warnings
import pandas as pd
import albumentations as album
from collections import Counter
from matplotlib import pyplot as plt
import keras.backend as K
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.applications.mobilenet import MobileNet

from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input
warnings.filterwarnings('ignore')

In [5]:
if not(os.path.exists(path)):
    !unzip -q /content/drive/MyDrive/d1/concat_images.zip

In [4]:
files = os.listdir(path)
patients = []
labels = []

for f in files:
    labels.append(f.split('_')[4])
    p = f.split('_')[0]
    c = f.split('_')[4]
    if (p not in patients) and (c not in ['4', '6']):
        patients.append(p)

print(pd.DataFrame({'labels': labels}).value_counts())

train_patients, valid_patients = train_test_split(patients, test_size=0.2, random_state=42)

labels
0         340
2          96
3          60
7          52
1          50
5          50
4           8
6           2
dtype: int64


In [5]:
def prepare_data(path, splited_data, select_label):
    all_files = os.listdir(path)

    data = []
    for t in splited_data:
        for f in files:
            if t == f.split('_')[0]:
                file_path = os.path.join(path, f)
                label = f.split('_')[4]
                if label==select_label:
                    data.append([file_path, 1])
                else:
                    data.append([file_path, 0])
    return data

train_data = prepare_data(path, train_patients, select_label)
valid_data = prepare_data(path, valid_patients, select_label)

In [6]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, batch_size=16, n_classes=8, image_size = 1024, shuffle=True):
        'Initialization'
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_classes = n_classes
        self.image_size = image_size
        self.dim = (image_size, image_size, 1)
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        data = [self.list_IDs[k][0] for k in indexes]
        labels = [self.list_IDs[k][1] for k in indexes]
        x, y = self.__data_generation(data,labels)

        return x, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, data,labels):
        'Generates data containing batch_size samples'
        # Initialization
        x = np.zeros((self.batch_size, *self.dim))
        y = np.zeros((self.batch_size, self.n_classes))

        # Generate data

        for i, data_name in enumerate(data):
            image = cv2.resize(cv2.imread(data_name,0), (self.image_size, self.image_size))/255.
            x[i,:,:,0] = image[:]
            y[i,labels[i]] = 1
        return x,y

train_generator = DataGenerator(list_IDs = train_data, n_classes=n_classes, batch_size=2, image_size=image_size, shuffle=True)
valid_generator = DataGenerator(list_IDs = valid_data, n_classes=n_classes, batch_size=2, image_size=image_size, shuffle=False)

In [7]:
def Global_attention_block(inputs):
    shape=K.int_shape(inputs)
    x=AveragePooling2D(pool_size=(shape[1],shape[2])) (inputs)
    x=Conv2D(shape[3],1, padding='same') (x)
    x=Activation('relu') (x)
    x=Conv2D(shape[3],1, padding='same') (x)
    x=Activation('sigmoid') (x)
    C_A=Multiply()([x,inputs])
    
    x=Lambda(lambda x: K.mean(x,axis=-1,keepdims=True))  (C_A)
    x=Activation('sigmoid') (x)

    S_A=Multiply()([x,C_A])
    return S_A

def Category_attention_block(inputs,classes,k):
    shape=K.int_shape(inputs)
    F=Conv2D(k*classes,1, padding='same') (inputs)
    F=BatchNormalization() (F)
    F1=Activation('relu') (F)
    
    F2=F1
    x=GlobalMaxPool2D()(F2)
    
    x=Reshape((classes,k)) (x)
    S=Lambda(lambda x: K.mean(x,axis=-1,keepdims=False))  (x)
    
    x=Reshape((shape[1],shape[2],classes,k)) (F1)
    x=Lambda(lambda x: K.mean(x,axis=-1,keepdims=False))  (x)
    x=Multiply()([S,x])
    M=Lambda(lambda x: K.mean(x,axis=-1,keepdims=True))  (x)
    
    semantic=Multiply()([inputs,M])
    return semantic

# base_model= MobileNet(weights=None, include_top=False, input_shape=(image_size,image_size,1))
base_model= EfficientNetB0(weights=None, include_top=False, input_shape=(image_size,image_size,1))

for layer in base_model.layers:
    layer.trainable = True
all_layers = [layer.output for layer in base_model.layers]

k = 3
base_model_out= all_layers[-1] #-3

GAB_out= Global_attention_block(base_model_out)
CAB_out= Category_attention_block(GAB_out, 2, k)
CAB_out= GlobalAveragePooling2D()(CAB_out)
CAB_out= Dropout(0.5)(CAB_out)
out= Dense(2, activation= 'softmax')(CAB_out)
# Create Model
model= Model(base_model.input, out)
optimizer = Adam(learning_rate= learning_rate)
# model.compile(optimizer= optimizer, loss= 'categorical_crossentropy', metrics=['acc', AUC(), Recall(), Precision()])
model.compile(optimizer= optimizer, loss= 'categorical_crossentropy', metrics=['acc'])

In [8]:
print('Train Result for Label {}'.format(select_label))
lr_decay= ReduceLROnPlateau(monitor= 'val_acc', factor= 0.5, patience= 3, verbose= 1)
save_model= ModelCheckpoint('model_{}_weights.h5'.format(select_label), monitor= 'val_loss', verbose= 1, save_best_only= True)
# earlystop = EarlyStopping(monitor='val_loss', patience=24, verbose=0)
history = model.fit(train_generator,
                    validation_data= valid_generator,
                    epochs= epochs,
                    workers= 2,
                    callbacks= [lr_decay,save_model],)

Train Result for Label 0
Epoch 1/5
Epoch 1: val_loss improved from inf to 0.68969, saving model to model_0_weights.h5
Epoch 2/5
Epoch 2: val_loss did not improve from 0.68969
Epoch 3/5
Epoch 3: val_loss did not improve from 0.68969
Epoch 4/5
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 4: val_loss did not improve from 0.68969
Epoch 5/5
Epoch 5: val_loss did not improve from 0.68969


In [14]:
def plot_history(history, metric, select_label):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric])
    
    plt.title('model {}'.format(metric))
    plt.ylabel(metric)
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.savefig('label {} {}.png'.format(select_label, metric))
    plt.close()

for metrics in ['loss', 'acc']:
    plot_history(history, metrics, select_label)