In [1]:
from keras.models import *
from keras.callbacks import *
from keras import backend as K
from data import *
import cv2
import argparse
import h5py
import matplotlib.pyplot as plt
from time import sleep
import numpy
from sklearn.metrics import roc_curve, auc
from scipy import interp
from itertools import cycle
from termcolor import colored
numpy.set_printoptions(threshold=numpy.nan)
from modelNIHDenseNet import *
from pprint import pprint as pp
from keras.preprocessing.image import ImageDataGenerator
#from sklearn.utils import class_weight as cw
import tensorflow as tf
import random
import time
import json
import pickle

#FLAG_DATASET_PATH = 'data'
FLAG_DATASET_PATH = '/mnt/DataStorage/HumanProtein/'
os.environ["CUDA_VISIBLE_DEVICES"]="0"
# set tensorflow don't use total GPU memory
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)

CAM_LAYER_NAME= {'vgg16':'block5_conv3', 'resnet50':'activation_49', 'nasnet_mobile':'normal_concat_12', 'densenet':'normal_concat_12'}
FLAG_MODEL_CHOSEN = 'densenet'

FLAG_TRAIN_BATCH_SIZE = 9
FLAG_TRAIN_TOTAL_EPOCH = 200
FLAG_CLASS_WEIGHT_SMOOTHING = 0.4
FLAS_HP_IMAGE_SIZE = 512

FLAG_DATA_MODE_INDEX = 0
#FLAG_NORMAL_MODE = 0, FLAG_BINARY_MODE = 1, FLAG_ONLY_DISEASE_MODE = 2
#current, only support normal mode
FLAG_DATA_MODE = ['FLAG_NORMAL_MODE', 'FLAG_BINARY_MODE']

LAST_MODEL_NAME = 'weights.epoch6.sub1.01-1.31.h5'
LAST_MODEL_PATH = os.path.join('model', FLAG_MODEL_CHOSEN, LAST_MODEL_NAME)

FLAG_TEST_DATA_MODE = False
if not FLAG_TEST_DATA_MODE:
    FLAG_DATASET_IMAGE = os.path.join(FLAG_DATASET_PATH, 'images')
    Data_MODE = 'FULL'
else:
    FLAG_DATASET_IMAGE = os.path.join(FLAG_DATASET_PATH, 'images_test')  
    Data_MODE = 'TEST'
    
LABEL_PATH = os.path.join(FLAG_DATASET_PATH, 'train.csv')

TAGS = 28

np.random.seed(2013)

# set tensorflow don't use total GPU memory
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)

float_formatter = lambda x: "%.5f" % x
np.set_printoptions(formatter={'float_kind':float_formatter})
    
def train():
    model = get_model(TAGS, FLAS_HP_IMAGE_SIZE)
    #model = load_model(LAST_MODEL_PATH, custom_objects={"tf": tf})

    data_name = 'data_' + Data_MODE + '_0_' + FLAG_DATA_MODE[FLAG_DATA_MODE_INDEX] + '.h5'
    print('train dataset for images ', data_name)
    model_h5_path = os.path.join(FLAG_DATASET_PATH, data_name)
    data_model = tables.open_file(model_h5_path, mode='a')
    print('data size = ', len(data_model.root.data))

    class_weight = create_class_weights(data_model.root.tag, FLAG_CLASS_WEIGHT_SMOOTHING)
    pp(class_weight)
    print('class_weight = ', class_weight)
    
    train_size = int(len(data_model.root.tag)*0.8)
            
    x = data_model.root.data[:train_size]
    y = data_model.root.tag[:train_size]

    data_gen_args = dict(zoom_range=[0.8, 1.2], horizontal_flip=True, vertical_flip=True, rotation_range=360) 
    
    image_datagen = ImageDataGenerator(**data_gen_args)
    image_datagen.fit(x)

    seed = random.randint(0,9999)
    image_generator = image_datagen.flow(x, y, seed=seed, batch_size=FLAG_TRAIN_BATCH_SIZE)
    
    validation_x = data_model.root.data[train_size:]
    validation_y = data_model.root.tag[train_size:]
    
    data_model.close() 
                    
    print("Training..")
    model_weight_name = 'weights.epoch.sub.{epoch:02d}-{val_f1:.2f}.h5'
    checkpoint_path = os.path.join('model', FLAG_MODEL_CHOSEN, model_weight_name)
    print('checkpoint_path = ', checkpoint_path)
    cp = ModelCheckpoint(checkpoint_path, monitor='val_f1', verbose=0, 
                         save_best_only=True, save_weights_only=False, mode='max')
    es = EarlyStopping(monitor='val_f1', min_delta=0, patience=15, verbose=0, mode='max')
            
    history = model.fit_generator(
        image_generator,
        validation_data=(validation_x, validation_y), 
        steps_per_epoch=len(x) / FLAG_TRAIN_BATCH_SIZE, 
        epochs=FLAG_TRAIN_TOTAL_EPOCH,
        verbose=1,
        callbacks=[cp, es])

    plotHis(history.history)

def create_class_weights(y, smooth_factor=0.2):
    tags_count = np.zeros(TAGS) 
    for label in y:
        tags_count[:TAGS] += label
    
    #print(tags_count)
    
    counter = {}
    for i in range(TAGS):
        counter[i] = tags_count[i]
    #print(counter)
    
    if smooth_factor > 0:
        p = max(counter.values()) * smooth_factor
        for k in counter.keys():
            counter[k] += p
    majority = max(counter.values())
    return {cls: float(majority / count) for cls, count in counter.items()}

def plotHis(history):
    # summarize history for accuracy
    plt.plot(history['f1'])
    plt.plot(history['val_f1'])
    plt.title('model f1')
    plt.ylabel('f1')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

start_time = time.time()

data_name = 'data_' + Data_MODE + '_0_' + FLAG_DATA_MODE[FLAG_DATA_MODE_INDEX] + '.h5'
model_h5_path = os.path.join(FLAG_DATASET_PATH, data_name)
if not os.path.exists(model_h5_path):
    print('make dataset for images ', data_name)
    load_hp_data_hd5_tf(FLAG_DATASET_IMAGE+'_0', FLAS_HP_IMAGE_SIZE, LABEL_PATH, TAGS, model_h5_path)

train()

print("--- %s seconds ---" % (time.time() - start_time))

Using TensorFlow backend.
  0%|          | 0/31072 [00:00<?, ?it/s]

make dataset for images  data_FULL_0_FLAG_NORMAL_MODE.h5


 79%|███████▊  | 24458/31072 [19:51<05:22, 20.53it/s]

KeyboardInterrupt: 