In [None]:
from argparse import ArgumentParser
from generators_binary import PatchGenerator, PatchSequence
from keras.callbacks import Callback
from keras.models import load_model
from keras.optimizers import Adam, RMSprop
from keras.utils import multi_gpu_model
from networks_binary import (create_initial_model,
                      create_second_model,
                      create_squeezenet3d_model,
                      create_squeezenet3d_model2
                     )
from skimage.transform import rotate
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import KFold
from sklearn.utils.class_weight import compute_class_weight

# machine learning / deep learning
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)

import keras.backend as K
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sys, os, warnings

DATADIR = '/projects/0/ismi2018/FINALPROJECTS/BREAST_3D_ULTRASOUND/shareWithStudents'

NETWORKS = {
    'initial': create_initial_model,
    'second': create_second_model,
    'squeezenet3d': create_squeezenet3d_model,
    'squeezenet3d2': create_squeezenet3d_model2
}

LOSS_FUNCTION = 'categorical_crossentropy'

OPTIMIZERS = {
    'adam': Adam,
    'rmsprop': RMSprop,
}


class MultiGPUCheckpoint(Callback):

    def __init__(self, filename, verbose=0):
        super().__init__()
        self.filename = filename
        self.verbose = verbose
        self.val_loss = []

    def on_epoch_end(self, epoch, logs=None):
        if not self.val_loss:
            self.model.layers[-2].save(self.filename)
        elif logs['val_loss'] < min(self.val_loss):
            if self.verbose > 0:
                print('Saving to {}'.format(self.filename))
            self.model.layers[-2].save(self.filename)
        self.val_loss.append(logs['val_loss'])


class Accuracies(Callback):

    def __init__(self, valid_seq, step = 0):
        super().__init__()
        self.valid_seq = valid_seq
        self.label_accuracies = []
        self.step = step

    def on_epoch_end(self, epoch, logs=None):
        
        y_pred = self.model.predict_generator(self.valid_seq,
                                              workers=4,
                                              use_multiprocessing=True)
        y_pred = np.argmax(y_pred, axis=1)
        y_true = self.valid_seq.get_all_labels()
        
        if self.step > 0:
            y_true = y_true[y_true != 21]
            y_true[y_true == 2] = 1
            y_true[y_true == 20] = 0
        else:
            y_true[y_true == 2] = 1
            y_true[y_true == 20] = 1
            y_true[y_true == 21] = 0
        
        
        cm = confusion_matrix(y_true, y_pred)
        ps = cm.diagonal() / cm.sum(axis=1)
        self.label_accuracies.append(ps)


def create_model(network, optimizer, drop_rate, multi_gpu):
    orig_model = NETWORKS[network](drop_rate=drop_rate)
    if multi_gpu:
        parallel_model = multi_gpu_model(orig_model)
        parallel_model.compile(optimizer=optimizer, loss=LOSS_FUNCTION,
                               metrics=['accuracy'])
    else:
        orig_model.compile(optimizer=optimizer, loss=LOSS_FUNCTION,
                           metrics=['accuracy'])
        parallel_model = None
    return orig_model, parallel_model


def create_optimizer(name, lr, decay):
    return OPTIMIZERS[name](lr=lr, decay=decay)


def make_augmentation_func(aug, aug_hflip, aug_vflip, aug_rotate,
                           aug_brightness):
    if not aug:
        return None

    def augf(img):
        if np.random.random() > aug and aug_hflip:
            img = np.flip(img, axis = 1)
        if np.random.random() > aug and aug_vflip:
            img = np.flip(img, axis = 0)
        if np.random.random() > aug and aug_rotate:
            tmp = np.squeeze(img)
            angle = np.random.uniform(0, aug_rotate)
            tmp = rotate(tmp, angle)
            img = np.expand_dims(tmp, -1)
        if np.random.random() > aug and aug_brightness:
            up_delta = 1. - img.max()
            down_delta = img.min()
            delta = min(up_delta, down_delta)
            img = img + np.random.uniform(-delta, delta)
        return img[15:55, 15:55, ...]

    return augf


def make_generators(csv, train_patients, validation_patients, batch_size, step,
                    augf):
    train_csv = csv.loc[csv['patientID'].isin(train_patients), :]
    valid_csv = csv.loc[csv['patientID'].isin(validation_patients), :]

    train_gen = PatchGenerator(
        input_dir=DATADIR,
        dataframe=train_csv,
        batch_size=batch_size,
        step=step,
        augmentation_fn=augf
    )

    valid_seq = PatchSequence(
        input_dir=DATADIR,
        dataframe=valid_csv,
        batch_size=batch_size,
        step=step
    )

    return train_gen, valid_seq


def train_model_1st():
    csv = pd.read_csv(os.path.join(DATADIR, 'trainingSet.csv'), dtype=str)

    # Create patient K-folder
    unique_patients = csv.patientID.unique()
    kf = KFold(5, shuffle=True, random_state=42)
    folds = kf.split(unique_patients)

    # Make augmentation function
    augf = make_augmentation_func(0.5, True, True, True, True)
    
    # -----------------------------------------------------------------------
        
    accuracies_first = []
    # first step training
    for i, (train_idxs, val_idxs) in enumerate(folds, start=1):
        K.clear_session()
        print('Fold {}'.format(i))

        train_patients = unique_patients[train_idxs]
        val_patients = unique_patients[val_idxs]

        train_gen, valid_seq = make_generators(csv,
                                               train_patients,
                                               val_patients,
                                               6,
                                               0,
                                               augf)
        
        optimizer = create_optimizer('adam', 1e-5, 1e-6)
        orig_net, parallel_net = create_model('squeezenet3d', optimizer,
                                              0.5,
                                              True)

        save_filename = './results_1st/{}_fold_{}.h5'.format('1st-step', i)
        
        if True:
            cp = MultiGPUCheckpoint(save_filename, verbose=1)
        else:
            cp = ModelCheckpoint(save_filename, save_best_only=True, verbose=1,
                                 monitor='val_acc')
        ps = Accuracies(valid_seq, 0)

        train_model = parallel_net or orig_net
                
        results = train_model.fit_generator(train_gen,
                                            steps_per_epoch=len(train_gen),
                                            validation_data=valid_seq,
                                            epochs=250,
                                            workers=4,
                                            use_multiprocessing=True,
                                            callbacks=[cp, ps],
                                            verbose=1)

        h = results.history
        plt.figure()
        plt.plot(h['loss'])
        plt.plot(h['acc'])
        plt.plot(h['val_loss'])
        plt.plot(h['val_acc'])
        plt.legend(['loss', 'acc', 'val_loss', 'val_acc'])
        plt.savefig('{}.traininglog.png'.format(save_filename))

        y_true = valid_seq.get_all_labels()
        y_true[y_true == 2] = 1
        y_true[y_true == 20] = 1
        y_true[y_true == 21] = 0
        best_net = load_model(save_filename)
        y_pred = best_net.predict_generator(valid_seq,
                                            workers=4,
                                            use_multiprocessing=True)
        y_pred = np.argmax(y_pred, axis=1)
        cm = confusion_matrix(y_true, y_pred)

        plt.figure()
        sns.heatmap(cm, annot=True)
        plt.savefig('{}.confusionmatrix.png'.format(save_filename))

        precs = np.array(ps.label_accuracies)

        plt.figure()
        for i in range(precs.shape[1]):
            plt.plot(precs[:, i])
        plt.legend(['0', '1'])
        plt.savefig('{}.accuracies.png'.format(save_filename))

        accuracies_first.append(max(h['val_acc']))

    with open('{}_score.txt'.format('2step-1'), 'w') as f:
        print('Mean accuracy 1st step: {:.4f}\n'.format(np.mean(accuracies_first)), file=f)
        

def train_model_2nd():
    csv = pd.read_csv(os.path.join(DATADIR, 'trainingSet.csv'), dtype=str)

    # Create patient K-folder
    unique_patients = csv.patientID.unique()
    kf = KFold(5, shuffle=True, random_state=42)
    folds = kf.split(unique_patients)

    # Make augmentation function
    augf = make_augmentation_func(0.5, True, True, True, True)
                
    # -------------------------------------------------------------------------
    accuracies_second = []
    # second step training
    for i, (train_idxs, val_idxs) in enumerate(folds, start=1):
        K.clear_session()
        print('Fold {}'.format(i))

        train_patients = unique_patients[train_idxs]
        val_patients = unique_patients[val_idxs]

        train_gen, valid_seq = make_generators(csv,
                                               train_patients,
                                               val_patients,
                                               6,
                                               1,
                                               augf)
        
        optimizer = create_optimizer('adam', 1e-5, 1e-6)
        orig_net, parallel_net = create_model('squeezenet3d', optimizer,
                                              0.5,
                                              True)

        save_filename = './results_2nd/{}_fold_{}.h5'.format('2nd-step', i)
        if True:
            cp = MultiGPUCheckpoint(save_filename, verbose=1)
        else:
            cp = ModelCheckpoint(save_filename, save_best_only=True, verbose=1,
                                 monitor='val_acc')
        ps = Accuracies(valid_seq, 1)

        train_model = parallel_net or orig_net
                
        results = train_model.fit_generator(train_gen,
                                            steps_per_epoch=len(train_gen),
                                            validation_data=valid_seq,
                                            epochs=250,
                                            workers=4,
                                            use_multiprocessing=True,
                                            callbacks=[cp, ps],
                                            verbose=1)

        h = results.history
        plt.figure()
        plt.plot(h['loss'])
        plt.plot(h['acc'])
        plt.plot(h['val_loss'])
        plt.plot(h['val_acc'])
        plt.legend(['loss', 'acc', 'val_loss', 'val_acc'])
        plt.savefig('{}.traininglog.png'.format(save_filename))

        y_true = valid_seq.get_all_labels()
        y_true = y_true[y_true != 21]
        y_true[y_true == 2] = 1
        y_true[y_true == 20] = 0
        best_net = load_model(save_filename)
        y_pred = best_net.predict_generator(valid_seq,
                                            workers=4,
                                            use_multiprocessing=True)
        y_pred = np.argmax(y_pred, axis=1)
        cm = confusion_matrix(y_true, y_pred)

        plt.figure()
        sns.heatmap(cm, annot=True)
        plt.savefig('{}.confusionmatrix.png'.format(save_filename))

        precs = np.array(ps.label_accuracies)

        plt.figure()
        for i in range(precs.shape[1]):
            plt.plot(precs[:, i])
        plt.legend(['0', '1'])
        plt.savefig('{}.accuracies.png'.format(save_filename))

        accuracies_second.append(max(h['val_acc']))
    
    with open('{}_score.txt'.format('2step-2'), 'w') as f:
        print('Mean accuracy 1st step: {:.4f}\n'.format(np.mean(accuracies_second)), file=f)

Using TensorFlow backend.


In [None]:
train_model_1st()
#train_model_2nd()

Fold 1
PatchGenerator detected: 239 patch samples.
- Cysts: 71 cases
- Tumors: 130 cases
- Fibroadenoma: 38 cases
PatchSequence detected: 59 patch samples.
- Cysts: 18 cases
- Tumors: 36 cases
- Fibroadenoma: 5 cases




Epoch 1/250
Epoch 2/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 3/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 4/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 5/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 6/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 7/250
Epoch 8/250
Epoch 9/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 10/250
Epoch 11/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 12/250
Epoch 13/250
Epoch 14/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 15/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 16/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 17/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 18/250
Epoch 19/250
Epoch 20/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 21/250
Epoch 22/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 23/250
Epoch 24/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 25/250
Epoch 26/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 27/250
Saving to ./r

Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 58/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78/250
Epoch 79/250
Epoch 80/250
Epoch 81/250
Epoch 82/250
Epoch 83/250
Epoch 84/250
Epoch 85/250
Epoch 86/250
Epoch 87/250
Epoch 88/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 89/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 90/250
Epoch 91/250
Epoch 92/250
Epoch 93/250
Epoch 94/250
Epoch 95/250
Epoch 96/250
Epoch 97/250
Epoch 98/250
Epoch 99/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 100/250
Epoch 101/250
Epoch 102/250
Epoch 103/250
Epoch 104/250
Epoch 105/250
Epoch 106/250
Epoch 107/250
Epoch 108/250
Epoch 109/250
Epoc

Epoch 171/250
Epoch 172/250
Epoch 173/250
Epoch 174/250
Epoch 175/250
Epoch 176/250
Epoch 177/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 178/250
Epoch 179/250
Epoch 180/250
Epoch 181/250
Epoch 182/250
Epoch 183/250
Epoch 184/250
Epoch 185/250
Epoch 186/250
Epoch 187/250
Epoch 188/250
Epoch 189/250
Epoch 190/250
Epoch 191/250
Epoch 192/250
Epoch 193/250
Epoch 194/250
Epoch 195/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 196/250
Epoch 197/250
Epoch 198/250
Epoch 199/250
Saving to ./results_1st/1st-step_fold_1.h5
Epoch 200/250
Epoch 201/250
Epoch 202/250
Epoch 203/250
Epoch 204/250
Epoch 205/250
Epoch 206/250
Epoch 207/250
Epoch 208/250
Epoch 209/250
Epoch 210/250
Epoch 211/250
Epoch 212/250
Epoch 213/250
Epoch 214/250
Epoch 215/250
Epoch 216/250
Epoch 217/250
Epoch 218/250
Epoch 219/250
Epoch 220/250
Epoch 221/250
Epoch 222/250
Epoch 223/250
Epoch 224/250
Epoch 225/250
Epoch 226/250
Epoch 227/250
Epoch 228/250
Epoch 229/250
Epoch 230/250
Saving to ./results_1st/1st-ste



Fold 2
PatchGenerator detected: 241 patch samples.
- Cysts: 73 cases
- Tumors: 136 cases
- Fibroadenoma: 32 cases
PatchSequence detected: 57 patch samples.
- Cysts: 16 cases
- Tumors: 30 cases
- Fibroadenoma: 11 cases
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 11/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 12/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 13/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 14/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 15/250
Epoch 16/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 17/250
Epoch 18/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 19/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 20/250
Epoch 21/250
Epoch 22/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 23/250
Epoch 24/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/2

Epoch 116/250
Epoch 117/250
Epoch 118/250
Epoch 119/250
Epoch 120/250
Epoch 121/250
Epoch 122/250
Epoch 123/250
Epoch 124/250
Epoch 125/250
Epoch 126/250
Epoch 127/250
Epoch 128/250
Epoch 129/250
Epoch 130/250
Epoch 131/250
Epoch 132/250
Epoch 133/250
Epoch 134/250
Epoch 135/250
Epoch 136/250
Epoch 137/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 138/250
Epoch 139/250
Epoch 140/250
Epoch 141/250
Epoch 142/250
Epoch 143/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 144/250
Epoch 145/250
Epoch 146/250
Epoch 147/250
Epoch 148/250
Epoch 149/250
Epoch 150/250
Epoch 151/250
Epoch 152/250
Epoch 153/250
Epoch 154/250
Epoch 155/250
Epoch 156/250
Epoch 157/250
Epoch 158/250
Epoch 159/250
Epoch 160/250
Epoch 161/250
Epoch 162/250
Epoch 163/250
Epoch 164/250
Epoch 165/250
Epoch 166/250
Epoch 167/250
Epoch 168/250
Epoch 169/250
Epoch 170/250
Epoch 171/250
Epoch 172/250
Epoch 173/250
Epoch 174/250


Epoch 175/250
Epoch 176/250
Epoch 177/250
Epoch 178/250
Epoch 179/250
Epoch 180/250
Epoch 181/250
Epoch 182/250
Epoch 183/250
Epoch 184/250
Epoch 185/250
Epoch 186/250
Epoch 187/250
Epoch 188/250
Epoch 189/250
Epoch 190/250
Epoch 191/250
Epoch 192/250
Epoch 193/250
Epoch 194/250
Epoch 195/250
Epoch 196/250
Epoch 197/250
Epoch 198/250
Epoch 199/250
Epoch 200/250
Epoch 201/250
Epoch 202/250
Epoch 203/250
Epoch 204/250
Epoch 205/250
Epoch 206/250
Epoch 207/250
Epoch 208/250
Epoch 209/250
Epoch 210/250
Epoch 211/250
Epoch 212/250
Epoch 213/250
Epoch 214/250
Epoch 215/250
Epoch 216/250
Epoch 217/250
Epoch 218/250
Epoch 219/250
Epoch 220/250
Epoch 221/250
Epoch 222/250
Epoch 223/250
Epoch 224/250
Epoch 225/250
Epoch 226/250
Epoch 227/250
Epoch 228/250
Epoch 229/250
Epoch 230/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 231/250
Epoch 232/250
Epoch 233/250
Epoch 234/250


Epoch 235/250
Epoch 236/250
Epoch 237/250
Epoch 238/250
Epoch 239/250
Epoch 240/250
Epoch 241/250
Epoch 242/250
Epoch 243/250
Epoch 244/250
Epoch 245/250
Epoch 246/250
Epoch 247/250
Epoch 248/250
Saving to ./results_1st/1st-step_fold_2.h5
Epoch 249/250
Epoch 250/250
Fold 3
PatchGenerator detected: 229 patch samples.
- Cysts: 67 cases
- Tumors: 126 cases
- Fibroadenoma: 36 cases
PatchSequence detected: 69 patch samples.
- Cysts: 22 cases
- Tumors: 40 cases
- Fibroadenoma: 7 cases
Epoch 1/250
Epoch 2/250
Epoch 3/250
Saving to ./results_1st/1st-step_fold_3.h5
Epoch 4/250
Saving to ./results_1st/1st-step_fold_3.h5
Epoch 5/250
Saving to ./results_1st/1st-step_fold_3.h5
Epoch 6/250
Saving to ./results_1st/1st-step_fold_3.h5
Epoch 7/250
Epoch 8/250
Saving to ./results_1st/1st-step_fold_3.h5
Epoch 9/250
Saving to ./results_1st/1st-step_fold_3.h5
Epoch 10/250
Saving to ./results_1st/1st-step_fold_3.h5
Epoch 11/250
Saving to ./results_1st/1st-step_fold_3.h5
Epoch 12/250
Saving to ./results_1st/1

Epoch 99/250
Epoch 100/250
Epoch 101/250
Epoch 102/250
Epoch 103/250
Epoch 104/250
Epoch 105/250
Epoch 106/250
Epoch 107/250
Epoch 108/250
Epoch 109/250
Epoch 110/250
Epoch 111/250
Epoch 112/250
Epoch 113/250
Epoch 114/250
Epoch 115/250
Epoch 116/250
Epoch 117/250
Epoch 118/250
Epoch 119/250
Epoch 120/250
Epoch 121/250
Epoch 122/250
Epoch 123/250
Epoch 124/250
Epoch 125/250
Epoch 126/250
Epoch 127/250
Epoch 128/250
Epoch 129/250
Epoch 130/250
Epoch 131/250
Epoch 132/250
Epoch 133/250
Epoch 134/250
Epoch 135/250

In [None]:
train_model_2nd()