In [2]:
import os
import imageio
import sys
import random
import numpy as np
import pandas as pd
from keras.models import Model
from keras.applications import InceptionV3
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import GlobalAveragePooling2D, Input
from skimage.transform import resize
from imgaug import augmenters as iaa
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
import tensorflow as tf

target_size = (299, 299)
batch_size = 256
epochs = 1000
dropout_rate = 0.5
learning_rate = 0.0001

work_dir = '/home/jarvis/workspace/ai_osteoporosis_hip'
share_dir = '/sdb1/share/ai_osteoporosis_hip'
crop_dir = os.path.join(share_dir, 'crop')
xls_dir = os.path.join(share_dir, 'excel_files')
xls_final = os.path.join(xls_dir, 'final.xlsx')

conv_base = InceptionV3(weights='imagenet',
                  include_top=False,
                  input_shape=(target_size[0], target_size[1], 3))

i = conv_base.input
x = conv_base(i)
x = GlobalAveragePooling2D()(x)
model = Model(inputs=i, outputs=x)

In [3]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 299, 299, 3)       0         
_________________________________________________________________
inception_v3 (Model)         (None, 8, 8, 2048)        21802784  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 2048)              0         
Total params: 21,802,784
Trainable params: 21,768,352
Non-trainable params: 34,432
_________________________________________________________________


In [70]:
import random
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (5, 5) 

# Augmentation parameters
seq = iaa.Sequential([
    iaa.Fliplr(0.5), # horizontally flip 50% of the images
#    iaa.Flipud(0.5),
    iaa.Crop(percent=(0, 0.1)),
    iaa.Multiply((0.9, 1.1)),
    iaa.Affine(
        translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)}, #{"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, 
        rotate=(-40, 40),
        shear=(-20, 20),
        order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
        cval=0,
        mode='edge'#['constant', 'edge']
    ),
    iaa.OneOf([
        iaa.GaussianBlur((0, 1.0)),
#        iaa.AverageBlur(k=(2, 7)),
        iaa.Sharpen(alpha=(0, 0.1), lightness=(0.75, 1.5))
    ]),
])


def generate_df(xls_file):
    df = pd.read_excel(xls_file, dtype={'patient_id':str})

    d1 = df[df['split']==1]

    columns_list = ['age', 'height', 'weight', 'bmi']
    
    for column_name in columns_list:
        mean = d1[column_name].mean()
        std = d1[column_name].std()
        df[column_name] = df[column_name].apply(lambda x: (x - mean) / std)
        
    sex_to_code = {'M': 1, 'F':0}
    df['sex_code'] = df['sex'].apply(lambda x: sex_to_code[x])    
    
    return df

def resample(df):
    resampled_list = []
    
    for i in range(len(df)):
        age = df.iloc[i]['age']
        sex = df.iloc[i]['sex_code']
        bmi = df.iloc[i]['bmi']
        class2_1 = df.iloc[i]['class2_1']
        split = df.iloc[i]['split']
        file_name = df.iloc[i]['file_name']
        if split in [2, 3]:
            resampled_list.append((age, sex, bmi, class2_1, split, file_name))
        elif class2_1 == 0:
            resampled_list.append((age, sex, bmi, class2_1, split, file_name))
        else:
            resampled_list.append((age, sex, bmi, class2_1, split, file_name))
            resampled_list.append((age, sex, bmi, class2_1, split, file_name))
            resampled_list.append((age, sex, bmi, class2_1, split, file_name))
    
    random.shuffle(resampled_list)
    df2 = pd.DataFrame(resampled_list, columns=['age', 'sex_code', 'bmi', 'class2_1', 'split', 'file_name'])
                     
    return df2

def read_image(file_path, target_size, is_train):
    img_gray = np.array(imageio.imread(file_path))
    max_val = np.amax(img_gray)
    img_gray = img_gray / max_val
    img_resized = resize(img_gray, target_size)
    img = np.dstack((img_resized, img_resized, img_resized))

    if is_train:
        #img = seq.augment_images(x)
        #plt.imshow(img, cmap='gray')
        #plt.show()
        img = seq.augment_image(img)
        #plt.imshow(img, cmap='gray')
        #plt.show()        
    return img


def extract_features(df, dir_name, target_size, batch_size, set_name):
    split_dict = {'train':1, 'validation':2, 'test':3}
    split = split_dict[set_name]
    data = df[df['split']==split]
    count = len(data)
    
    div = count // batch_size
    res = count % batch_size
    
    i = 0
    
    print('sample count=%d' % count)

    if set_name == 'train':
        features = np.zeros(shape=(count*5, 2048))
        labels = np.zeros(shape=(count*5))
        
        for l in range(5):
            i = 0
            for n in range(div+1):
                if n == div and res != 0: img_batch_size = res
                else: img_batch_size = batch_size
                img_batch = np.zeros(shape=(img_batch_size, target_size[0], target_size[1], 3))
                label_batch = np.zeros(shape=(img_batch_size))
                
                for m in range(img_batch_size):
                    file_name = data.iloc[i]['file_name']
                    file_path = os.path.join(dir_name, file_name)
                    class2_1 = data.iloc[i]['class2_1']

                    img_batch[m] = read_image(file_path, target_size, is_train=True)
                    label_batch[m] = class2_1
                    i += 1
                    print('Processing image %d...\r' % i, end='')
                    sys.stdout.flush()
                features_batch = model.predict(img_batch)
                features[count*l+n*batch_size : count*l+n*batch_size+img_batch_size] = features_batch            
                labels[count*l+n*batch_size : count*l+n*batch_size+img_batch_size] = label_batch            
                print('Filling batch %d~%d...' % (count*l+n*batch_size ,count*l+n*batch_size+img_batch_size))
    else:
        features = np.zeros(shape=(count, 2048))
        labels = np.zeros(shape=(count))

        for n in range(div+1):
            if n == div and res != 0: img_batch_size = res
            else: img_batch_size = batch_size
            img_batch = np.zeros(shape=(img_batch_size, target_size[0], target_size[1], 3))

            for m in range(img_batch_size):
                file_name = data.iloc[i]['file_name']
                file_path = os.path.join(dir_name, file_name)
                class2_1 = data.iloc[i]['class2_1']

                img_batch[m] = read_image(file_path, target_size, is_train=False)
                labels[i] = class2_1
                i += 1
                print('Processing image %d...\r' % i, end='')
                sys.stdout.flush()
            features_batch = model.predict(img_batch)
            features[n*batch_size : n*batch_size+img_batch_size] = features_batch            
    return features, labels

def extract_train_features(df, dir_name, target_size, batch_size):
    data = df[df['split']==1]
    count = len(data)
    
    div = count // batch_size
    res = count % batch_size
    
    i = 0
    
    print('sample count=%d' % count)

    features = np.zeros(shape=(count*5, 2048))
    labels = np.zeros(shape=(count*5))

    for n in range(div+1):
        if n == div and res != 0: img_batch_size = res
        else: img_batch_size = batch_size
        img_batch = np.zeros(shape=(img_batch_size, target_size[0], target_size[1], 3))
        label_batch = np.zeros(shape=(img_batch_size))

        for m in range(img_batch_size):
            file_name = data.iloc[i]['file_name']
            file_path = os.path.join(dir_name, file_name)
            class2_1 = data.iloc[i]['class2_1']

            img_batch[m] = read_image(file_path, target_size, is_train=True)
            label_batch[m] = class2_1
            i += 1
            print('Processing image %d...\r' % i, end='')
            sys.stdout.flush()
        features_batch = model.predict(img_batch)
        features[n*batch_size : n*batch_size+img_batch_size] = features_batch            
        labels[n*batch_size : n*batch_size+img_batch_size] = label_batch
    return features, labels

In [5]:
df_final = generate_df(xls_final)
df_resample = resample(df_final)

In [54]:
train_features, train_labels = extract_features(df_resample, crop_dir, target_size, batch_size, 'train')
validation_features, validation_labels = extract_features(df_resample, crop_dir, target_size, batch_size, 'validation')
test_features, test_labels = extract_features(df_resample, crop_dir, target_size, batch_size, 'test')

sample count=13052
Processing image 4...

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


Filling batch 0~256....
Filling batch 256~512...
Filling batch 512~768...
Filling batch 768~1024...
Filling batch 1024~1280...
Filling batch 1280~1536...
Filling batch 1536~1792...
Filling batch 1792~2048...
Filling batch 2048~2304...
Filling batch 2304~2560...
Filling batch 2560~2816...
Filling batch 2816~3072...
Filling batch 3072~3328...
Filling batch 3328~3584...
Filling batch 3584~3840...
Filling batch 3840~4096...
Filling batch 4096~4352...
Filling batch 4352~4608...
Filling batch 4608~4864...
Filling batch 4864~5120...
Filling batch 5120~5376...
Filling batch 5376~5632...
Filling batch 5632~5888...
Filling batch 5888~6144...
Filling batch 6144~6400...
Filling batch 6400~6656...
Filling batch 6656~6912...
Filling batch 6912~7168...
Filling batch 7168~7424...
Filling batch 7424~7680...
Filling batch 7680~7936...
Filling batch 7936~8192...
Filling batch 8192~8448...
Filling batch 8448~8704...
Filling batch 8704~8960...
Filling batch 8960~9216...
Filling batch 9216~9472...
Filling b

In [19]:
import keras
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras import backend as K

class CustomModelCheckpoint(ModelCheckpoint):
    """Save the model after every epoch.
    `filepath` can contain named formatting options,
    which will be filled the value of `epoch` and
    keys in `logs` (passed in `on_epoch_end`).
    For example: if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`,
    then the model checkpoints will be saved with the epoch number and
    the validation loss in the filename.
    # Arguments
        filepath: string, path to save the model file.
        monitor: quantity to monitor.
        verbose: verbosity mode, 0 or 1.
        save_best_only: if `save_best_only=True`,
            the latest best model according to
            the quantity monitored will not be overwritten.
        mode: one of {auto, min, max}.
            If `save_best_only=True`, the decision
            to overwrite the current save file is made
            based on either the maximization or the
            minimization of the monitored quantity. For `val_acc`,
            this should be `max`, for `val_loss` this should
            be `min`, etc. In `auto` mode, the direction is
            automatically inferred from the name of the monitored quantity.
        save_weights_only: if True, then only the model's weights will be
            saved (`model.save_weights(filepath)`), else the full model
            is saved (`model.save(filepath)`).
        period: Interval (number of epochs) between checkpoints.
    """

    def __init__(self, filepath, monitor='val_loss', verbose=0,
                 save_best_only=False, save_weights_only=False,
                 mode='auto', period=1):
        super(ModelCheckpoint, self).__init__()
        self.monitor = monitor
        self.verbose = verbose
        self.filepath = filepath
        self.save_best_only = save_best_only
        self.save_weights_only = save_weights_only
        self.period = period
        self.batches_since_last_save = 0

        if mode not in ['auto', 'min', 'max']:
            warnings.warn('ModelCheckpoint mode %s is unknown, '
                          'fallback to auto mode.' % (mode),
                          RuntimeWarning)
            mode = 'auto'

        if mode == 'min':
            self.monitor_op = np.less
            self.best = np.Inf
        elif mode == 'max':
            self.monitor_op = np.greater
            self.best = -np.Inf
        else:
            if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
                self.monitor_op = np.greater
                self.best = -np.Inf
            else:
                self.monitor_op = np.less
                self.best = np.Inf

    def on_batch_end(self, batch, logs=None):
        logs = logs or {}
        self.batches_since_last_save += 1
        if self.batches_since_last_save >= self.period:
            self.batches_since_last_save = 0
            filepath = self.filepath.format(epoch=batch + 1, **logs)
            if self.save_best_only:
                current = logs.get(self.monitor)
                if current is None:
                    warnings.warn('Can save best model only with %s available, '
                                  'skipping.' % (self.monitor), RuntimeWarning)
                else:
                    if self.monitor_op(current, self.best):
                        if self.verbose > 0:
                            print('\nBatch %05d: %s improved from %0.5f to %0.5f,'
                                  ' saving model to %s'
                                  % (batch + 1, self.monitor, self.best,
                                     current, filepath))
                        self.best = current
                        if self.save_weights_only:
                            self.model.save_weights(filepath, overwrite=True)
                        else:
                            self.model.save(filepath, overwrite=True)
                    else:
                        if self.verbose > 0:
                            print('\nBatch %05d: %s did not improve from %0.5f' %
                                  (batch + 1, self.monitor, self.best))
            else:
                if self.verbose > 0:
                    print('\nBatch %05d: saving model to %s' % (batch + 1, filepath))
                if self.save_weights_only:
                    self.model.save_weights(filepath, overwrite=True)
                else:
                    self.model.save(filepath, overwrite=True)
                    
                    
class CustomTensorBoard(TensorBoard):
    """TensorBoard basic visualizations.
    [TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard)
    is a visualization tool provided with TensorFlow.
    This callback writes a log for TensorBoard, which allows
    you to visualize dynamic graphs of your training and test
    metrics, as well as activation histograms for the different
    layers in your model.
    If you have installed TensorFlow with pip, you should be able
    to launch TensorBoard from the command line:
    ```sh
    tensorboard --logdir=/full_path_to_your_logs
    ```
    When using a backend other than TensorFlow, TensorBoard will still work
    (if you have TensorFlow installed), but the only feature available will
    be the display of the losses and metrics plots.
    # Arguments
        log_dir: the path of the directory where to save the log
            files to be parsed by TensorBoard.
        histogram_freq: frequency (in epochs) at which to compute activation
            and weight histograms for the layers of the model. If set to 0,
            histograms won't be computed. Validation data (or split) must be
            specified for histogram visualizations.
        write_graph: whether to visualize the graph in TensorBoard.
            The log file can become quite large when
            write_graph is set to True.
        write_grads: whether to visualize gradient histograms in TensorBoard.
            `histogram_freq` must be greater than 0.
        batch_size: size of batch of inputs to feed to the network
            for histograms computation.
        write_images: whether to write model weights to visualize as
            image in TensorBoard.
        embeddings_freq: frequency (in epochs) at which selected embedding
            layers will be saved. If set to 0, embeddings won't be computed.
            Data to be visualized in TensorBoard's Embedding tab must be passed
            as `embeddings_data`.
        embeddings_layer_names: a list of names of layers to keep eye on. If
            None or empty list all the embedding layer will be watched.
        embeddings_metadata: a dictionary which maps layer name to a file name
            in which metadata for this embedding layer is saved. See the
            [details](https://www.tensorflow.org/how_tos/embedding_viz/#metadata_optional)
            about metadata files format. In case if the same metadata file is
            used for all embedding layers, string can be passed.
        embeddings_data: data to be embedded at layers specified in
            `embeddings_layer_names`. Numpy array (if the model has a single
            input) or list of Numpy arrays (if the model has multiple inputs).
            Learn [more about embeddings](https://www.tensorflow.org/programmers_guide/embedding)
    """

    def __init__(self, log_dir='./logs',
                 histogram_freq=0,
                 batch_size=32,
                 write_graph=True,
                 write_grads=False,
                 write_images=False,
                 embeddings_freq=0,
                 embeddings_layer_names=None,
                 embeddings_metadata=None,
                 embeddings_data=None):
        super(TensorBoard, self).__init__()
        global tf, projector
        try:
            import tensorflow as tf
            from tensorflow.contrib.tensorboard.plugins import projector
        except ImportError:
            raise ImportError('You need the TensorFlow module installed to use TensorBoard.')

        self.log_dir = log_dir
        self.histogram_freq = histogram_freq
        self.merged = None
        self.write_graph = write_graph
        self.write_grads = write_grads
        self.write_images = write_images
        self.embeddings_freq = embeddings_freq
        self.embeddings_layer_names = embeddings_layer_names
        self.embeddings_metadata = embeddings_metadata or {}
        self.batch_size = batch_size
        self.embeddings_data = embeddings_data

    def set_model(self, model):
        self.model = model
        if K.backend() == 'tensorflow':
            self.sess = K.get_session()
        if self.histogram_freq and self.merged is None:
            for layer in self.model.layers:

                for weight in layer.weights:
                    mapped_weight_name = weight.name.replace(':', '_')
                    tf.summary.histogram(mapped_weight_name, weight)
                    if self.write_grads:
                        grads = model.optimizer.get_gradients(model.total_loss,
                                                              weight)

                        def is_indexed_slices(grad):
                            return type(grad).__name__ == 'IndexedSlices'
                        grads = [
                            grad.values if is_indexed_slices(grad) else grad
                            for grad in grads]
                        tf.summary.histogram('{}_grad'.format(mapped_weight_name), grads)

                if hasattr(layer, 'output'):
                    if isinstance(layer.output, list):
                        for i, output in enumerate(layer.output):
                            tf.summary.histogram('{}_out_{}'.format(layer.name, i), output)
                    else:
                        tf.summary.histogram('{}_out'.format(layer.name),
                                             layer.output)
        self.merged = tf.summary.merge_all()

        if self.write_graph:
            self.writer = tf.summary.FileWriter(self.log_dir,
                                                self.sess.graph)
        else:
            self.writer = tf.summary.FileWriter(self.log_dir)

        if self.embeddings_freq and self.embeddings_data is not None:
            self.embeddings_data = standardize_input_data(self.embeddings_data, model.input_names)

            embeddings_layer_names = self.embeddings_layer_names

            if not embeddings_layer_names:
                embeddings_layer_names = [layer.name for layer in self.model.layers
                                          if type(layer).__name__ == 'Embedding']
            self.assign_embeddings = []
            embeddings_vars = {}

            self.batch_id = batch_id = tf.placeholder(tf.int32)
            self.step = step = tf.placeholder(tf.int32)

            for layer in self.model.layers:
                if layer.name in embeddings_layer_names:
                    embedding_input = self.model.get_layer(layer.name).output
                    embedding_size = np.prod(embedding_input.shape[1:])
                    embedding_input = tf.reshape(embedding_input,
                                                 (step, int(embedding_size)))
                    shape = (self.embeddings_data[0].shape[0], int(embedding_size))
                    embedding = tf.Variable(tf.zeros(shape),
                                            name=layer.name + '_embedding')
                    embeddings_vars[layer.name] = embedding
                    batch = tf.assign(embedding[batch_id:batch_id + step],
                                      embedding_input)
                    self.assign_embeddings.append(batch)

            self.saver = tf.train.Saver(list(embeddings_vars.values()))

            embeddings_metadata = {}

            if not isinstance(self.embeddings_metadata, str):
                embeddings_metadata = self.embeddings_metadata
            else:
                embeddings_metadata = {layer_name: self.embeddings_metadata
                                       for layer_name in embeddings_vars.keys()}

            config = projector.ProjectorConfig()

            for layer_name, tensor in embeddings_vars.items():
                embedding = config.embeddings.add()
                embedding.tensor_name = tensor.name

                if layer_name in embeddings_metadata:
                    embedding.metadata_path = embeddings_metadata[layer_name]

            projector.visualize_embeddings(self.writer, config)

    def on_batch_end(self, batch, logs=None):
        epoch = batch
        logs = logs or {}

        if not self.validation_data and self.histogram_freq:
            raise ValueError("If printing histograms, validation_data must be "
                             "provided, and cannot be a generator.")
        if self.embeddings_data is None and self.embeddings_freq:
            raise ValueError("To visualize embeddings, embeddings_data must "
                             "be provided.")
        if self.validation_data and self.histogram_freq:
            if epoch % self.histogram_freq == 0:

                val_data = self.validation_data
                tensors = (self.model.inputs +
                           self.model.targets +
                           self.model.sample_weights)

                if self.model.uses_learning_phase:
                    tensors += [K.learning_phase()]

                assert len(val_data) == len(tensors)
                val_size = val_data[0].shape[0]
                i = 0
                while i < val_size:
                    step = min(self.batch_size, val_size - i)
                    if self.model.uses_learning_phase:
                        # do not slice the learning phase
                        batch_val = [x[i:i + step] for x in val_data[:-1]]
                        batch_val.append(val_data[-1])
                    else:
                        batch_val = [x[i:i + step] for x in val_data]
                    assert len(batch_val) == len(tensors)
                    feed_dict = dict(zip(tensors, batch_val))
                    result = self.sess.run([self.merged], feed_dict=feed_dict)
                    summary_str = result[0]
                    self.writer.add_summary(summary_str, epoch)
                    i += self.batch_size

        for name, value in logs.items():
            if name in ['batch', 'size']:
                continue
            summary = tf.Summary()
            summary_value = summary.value.add()
            if isinstance(value, np.ndarray):
                summary_value.simple_value = value.item()
            else:
                summary_value.simple_value = value
            summary_value.tag = name
            self.writer.add_summary(summary, epoch)
        self.writer.flush()

    def on_train_end(self, _):
        self.writer.close()                    

In [74]:
tmp_dir = os.path.join(share_dir, 'tmp/numpy')
for i in range(100):
    feature_file = '%s/train_features_%03d.npy' % (tmp_dir, i)
    label_file = '%s/train_labels_%03d.npy' % (tmp_dir, i)
    print(feature_file, label_file)
    train_features, train_labels = extract_train_features(df_resample, crop_dir, target_size, batch_size)
    np.save(feature_file, train_features)    
    np.save(label_file, train_labels)
    

/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_000.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_000.npy
sample count=13052
Processing image 4...

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_001.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_001.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_002.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_002.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_003.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_003.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_004.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_004.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_005.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_005.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_006.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_006.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_007.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_label

/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_057.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_057.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_058.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_058.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_059.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_059.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_060.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_060.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_061.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_061.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_062.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_labels_062.npy
sample count=13052
/sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_features_063.npy /sdb1/share/ai_osteoporosis_hip/tmp/numpy/train_label

In [58]:
from keras import models
from keras import layers
from keras import optimizers

model2 = models.Sequential()
model2.add(layers.Dense(1024, activation='relu', input_dim=2048))
model2.add(layers.Dropout(dropout_rate))
model2.add(layers.Dense(1, activation='sigmoid'))

model2.compile(optimizer=optimizers.RMSprop(learning_rate),
              loss='binary_crossentropy',
              metrics=['acc'])

checkpoint_dir = os.path.join(work_dir, 'checkpoint/test')
log_dir = os.path.join(work_dir, 'log/test')
if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir)
model_path = os.path.join(checkpoint_dir, '{epoch:02d}-{val_acc:.4f}.hdf5')
checkpoint = ModelCheckpoint(filepath=model_path, monitor='val_acc', period=5)
tensorboard = TensorBoard(log_dir=log_dir)

history = model2.fit(train_features, train_labels,
                    epochs=300,
                    batch_size=batch_size,
                    callbacks=[checkpoint, tensorboard],
                    #validation_steps=1,
                    validation_data=(validation_features, validation_labels))

Train on 65260 samples, validate on 2330 samples
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300

KeyboardInterrupt: 

In [50]:
data = df_resample[df_resample['class2_1']==1]

In [63]:
len(validation_labels)

2330

In [52]:
len(data)

6047

In [56]:
from sklearn.metrics import auc
#df = get_clinical_data(csv_file)

#test_file_list, img_test, clinical_test, y_test = get_image_data(test_dir, df)
#test_file_list = []
#for path, dirs, files in os.walk(test_dir):
#    test_file_list += [os.path.join(path, file) for file in files]
#count = len(test_file_list)

#img_test = preprocess_input(img_test, is_train=False)
y_pred = model2.predict(test_features)
scores = model2.evaluate(test_features, test_labels, batch_size)

#scores = model.evaluate_generator(custom_generator(test_file_list, df, is_train=False, non_image_data=True), steps=math.ceil(test_set_size/batch_size))
print("%s: %.2f%%" %(model2.metrics_names[1], scores[1]*100))


fpr, tpr, thresholds = roc_curve(test_labels, y_pred)
auc = auc(fpr, tpr)
print('auc=%0.3f' % auc)

acc: 79.17%
auc=0.873


In [57]:
porosis = 0
total = 0
for label, pred in zip(test_labels, y_pred):
    if label == 1: porosis += 1
    total+=1
    print('%d, %0.1f%%' % (label, pred*100))
print(porosis, total)

0, 0.0%
0, 67.3%
0, 0.2%
1, 0.1%
0, 51.9%
0, 0.5%
0, 0.0%
0, 0.1%
0, 0.0%
0, 0.4%
0, 53.0%
0, 0.0%
0, 0.0%
0, 0.6%
1, 79.2%
0, 21.8%
0, 0.1%
0, 0.1%
1, 98.8%
0, 0.0%
0, 3.4%
0, 9.8%
0, 0.0%
0, 88.4%
1, 69.2%
0, 6.5%
0, 0.0%
0, 0.0%
1, 18.5%
0, 0.2%
0, 51.2%
0, 0.0%
0, 57.4%
0, 8.4%
0, 0.0%
0, 97.1%
0, 6.7%
0, 0.6%
0, 0.0%
0, 13.6%
0, 98.1%
0, 67.4%
0, 0.0%
0, 86.2%
0, 0.1%
0, 0.0%
0, 0.0%
1, 95.3%
0, 0.0%
0, 0.3%
0, 0.0%
1, 16.9%
0, 0.3%
0, 2.9%
1, 99.9%
0, 54.5%
0, 59.7%
1, 99.7%
0, 0.2%
1, 10.9%
0, 5.3%
0, 0.0%
0, 0.0%
0, 0.2%
1, 64.8%
0, 0.0%
0, 0.3%
0, 0.0%
0, 49.6%
0, 11.0%
0, 0.0%
0, 2.8%
0, 98.3%
0, 8.5%
0, 0.0%
0, 3.2%
0, 98.6%
0, 3.6%
0, 24.6%
1, 89.3%
0, 0.0%
0, 21.2%
0, 10.5%
0, 2.4%
0, 2.1%
0, 1.9%
0, 9.2%
0, 73.6%
0, 77.4%
0, 87.1%
0, 2.2%
0, 0.1%
0, 0.0%
0, 1.6%
0, 0.9%
0, 3.3%
0, 83.2%
0, 0.0%
0, 75.7%
0, 2.1%
0, 0.0%
0, 0.0%
0, 55.7%
0, 1.6%
1, 61.4%
0, 0.0%
0, 4.2%
0, 0.4%
0, 28.4%
0, 23.2%
0, 0.0%
0, 0.0%
0, 91.2%
0, 0.1%
0, 5.5%
0, 99.9%
0, 1.2%
0, 0.0%
1, 93.0%
0, 5

In [64]:
train_features.shape

(65260, 2048)

In [65]:
tmp_dir = os.path.join(share_dir, 'tmp')
outfile = os.path.join(tmp_dir, 'train_features.npy')
np.save(outfile, train_features)

In [66]:
train_features2 = np.load(outfile)

In [68]:
train_features2.shape

(65260, 2048)