In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import gzip

import numpy as np
from IPython import display

import time

from keras import backend as K
from keras.callbacks import Callback
from keras.models import Model, load_model, model_from_json
from keras.optimizers import SGD, Adam
from keras.layers import Dense, Dropout, GlobalAveragePooling2D # Conv2D, Input, Flatten, MaxPooling2D, UpSampling2D, concatenate, Cropping2D, Reshape, BatchNormalization
from keras.utils import HDF5Matrix
from keras.applications.vgg16 import VGG16

from keras.models import load_model
import matplotlib.pyplot as plt
%matplotlib inline


import tensorflow as tf

Using TensorFlow backend.


In [3]:
from keras.applications.xception import Xception
from keras.applications.vgg16 import VGG16
from keras.applications.inception_resnet_v2 import InceptionResNetV2

In [4]:
from utils.load_data import load_data
from utils.preprocess import DataGenerator
from utils.comparams import calculate_auc, auc

W0516 18:54:13.875253 140072729698432 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/PatchCamelyon/utils/comparams.py:1: The name tf.local_variables_initializer is deprecated. Please use tf.compat.v1.local_variables_initializer instead.



In [5]:
def show(image, now=True, fig_size=(5, 5)):
    image = image.astype(np.float32)
    m, M = image.min(), image.max()
    if fig_size != None:
        plt.rcParams['figure.figsize'] = (fig_size[0], fig_size[1])
    plt.imshow((image - m) / (M - m), cmap='gray')
    plt.axis('off')
    if now == True:
        plt.show()

### Data Generator

In [10]:
# data_dir = './data/macenko'
data_dir = './data'

In [11]:
x_test, y_test_true = load_data(data_dir, purpose='test')

In [12]:
# indexes
test_id = np.arange(len(x_test))

# create a useful dictionary structures
partition = {}
partition['test'] = test_id
    
test_labels = {str(i) : y_test_true[i].flatten()[0] for i in test_id}

In [13]:
len(test_labels)

32768

## VGG16 Predictions

##### Calculate AUC 

In [14]:
true_labels = np.array(y_test_true).flatten()

In [15]:
pred_labels = np.array([p[1] for p in preds])

In [27]:
calculate_auc(true_labels, pred_labels)

sklearn auc: 0.9129782979784034
tf auc: [0.9129074, 0.9129074]


## VGG19 Predictions

In [39]:
true_labels = np.array(y_test_true).flatten()

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels)

sklearn auc: 0.9086748213509703
tf auc: [0.90866363, 0.90866363]


## VGG19 Predictions with lr 0.001

In [50]:
true_labels = np.array(y_test_true).flatten()

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels)

sklearn auc: 0.9062592216085712
tf auc: [0.90621924, 0.90621924]


# VGG19 with learningrate 0.0001

In [64]:
true_labels = np.array(y_test_true).flatten()

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels)

sklearn auc: 0.8990378027888102
tf auc: [0.89904654, 0.89904654]


## Inception 


In [14]:
# directory to save the best model
file_dir = './Model/lor_acc'
pred_file_dir = './Preds'
model_name = 'InceptionResNetV2_best_acc_model'
network_filepath = os.path.join(file_dir, model_name + '.h5') #_limitless

# Define batch size.
batch_size = 128

# Parameters for generators
params = {
    'dim': (224, 224),
    'batch_size': batch_size,
    'n_classes': 2,
    'shuffle': False
}

# Generators
test_generator = DataGenerator(partition['test'], x_test, test_labels, **params)

from tensorflow import metrics, local_variables_initializer
from keras.backend import get_session
from sklearn.metrics import roc_auc_score as skroc
import tensorflow as tf

def auc(y_true, y_pred):
    auc = metrics.auc(y_true, y_pred)[1]
    get_session().run(local_variables_initializer())
    return auc


dependencies = {
    'auc': auc
}

In [15]:
incres = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))

x = incres.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
#x = Dropout(0.5)(x)
y = Dense(2, activation='softmax')(x) # sigmoid instead of softmax to have independent probabilities

model = Model(inputs=incres.input, outputs=y)

# Train only the top layer
for layer in incres.layers:
    layer.trainable = False

W0516 18:54:52.948099 140072729698432 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/PatchCamelyon/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0516 18:54:52.962135 140072729698432 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/PatchCamelyon/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0516 18:54:52.964756 140072729698432 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/PatchCamelyon/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0516 18:54:52.982017 140072729698432 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/PatchCamelyon/.env/lib/python3.6/site-packages/keras/backend/tensorflow_b

In [16]:
model.load_weights(network_filepath)

In [17]:
preds = model.predict_generator(test_generator)

In [18]:
# whole data normal data predictions inception resnet
true_labels = np.array(y_test_true).flatten()[:len(preds)]

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels) # without beta lr=0.0001

W0516 18:57:07.364081 140072729698432 deprecation.py:323] From /home/aorus/workspaces/simge/PatchCamelyon/.env/lib/python3.6/site-packages/tensorflow/python/ops/metrics_impl.py:808: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


sklearn auc: 0.48340424219819855
tf auc: [0.46961868, 0.46961868]


In [24]:
with open(os.path.join(pred_file_dir, 'preds_inception-resnet.csv'), 'w') as f:
    f.write("case, prediction1, prediction2")
    for i, p in enumerate(preds):
        f.write(str(i) + ',' + str(p[0]) + ',' + str(p[1]) + '\n')
        
        
with open(os.path.join(pred_file_dir, 'preds_inception-resnet1_sub.csv'), 'w') as f:
    f.write("case, prediction\n")
    for i, p in enumerate(preds):
        f.write(str(i) + ',' + str(p[1]) + '\n')

## VGG16 Whole data lorenzo

In [23]:
# directory to save the best model
file_dir = './Model/lor_acc'
pred_file_dir = './Preds'
model_name = 'vgg16_best_acc_model'
network_filepath = os.path.join(file_dir, model_name + '.h5') #_limitless

# Define batch size.
batch_size = 128

# Parameters for generators
params = {
    'dim': (224, 224),
    'batch_size': batch_size,
    'n_classes': 2,
    'shuffle': False
}

# Generators
test_generator = DataGenerator(partition['test'], x_test, test_labels, **params)

from tensorflow import metrics, local_variables_initializer
from keras.backend import get_session
from sklearn.metrics import roc_auc_score as skroc
import tensorflow as tf

def auc(y_true, y_pred):
    auc = metrics.auc(y_true, y_pred)[1]
    get_session().run(local_variables_initializer())
    return auc


dependencies = {
    'auc': auc
}

In [24]:
vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))

x = vgg16.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
#x = Dropout(0.5)(x)
y = Dense(2, activation='softmax')(x) # sigmoid instead of softmax to have independent probabilities

model = Model(inputs=vgg16.input, outputs=y)

# Train only the top layer
for layer in vgg16.layers:
    layer.trainable = False

In [25]:
model.load_weights(network_filepath)

In [26]:
preds = model.predict_generator(test_generator)

In [27]:
# whole data normal data predictions vgg16
true_labels = np.array(y_test_true).flatten()[:len(preds)]

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels) # without beta lr=0.0001

sklearn auc: 0.8803514321789897
tf auc: [0.88006735, 0.88006735]


In [22]:
with open(os.path.join(pred_file_dir, 'vgg16-whole_data_lore.csv'), 'w') as f:
    f.write("case, prediction1, prediction2")
    for i, p in enumerate(preds):
        f.write(str(i) + ',' + str(p[0]) + ',' + str(p[1]) + '\n')
        
        
with open(os.path.join(pred_file_dir, 'vgg16-whole_data_lore_sub.csv'), 'w') as f:
    f.write("case, prediction\n")
    for i, p in enumerate(preds):
        f.write(str(i) + ',' + str(p[1]) + '\n')

## xception Whole data lorenzo

In [28]:
# directory to save the best model
file_dir = './Model/lor_acc'
pred_file_dir = './Preds'
model_name = 'Xception_best_acc_model'
network_filepath = os.path.join(file_dir, model_name + '.h5') #_limitless

# Define batch size.
batch_size = 128

# Parameters for generators
params = {
    'dim': (224, 224),
    'batch_size': batch_size,
    'n_classes': 2,
    'shuffle': False
}

# Generators
test_generator = DataGenerator(partition['test'], x_test, test_labels, **params)

from tensorflow import metrics, local_variables_initializer
from keras.backend import get_session
from sklearn.metrics import roc_auc_score as skroc
import tensorflow as tf

def auc(y_true, y_pred):
    auc = metrics.auc(y_true, y_pred)[1]
    get_session().run(local_variables_initializer())
    return auc


dependencies = {
    'auc': auc
}

In [29]:
inc = Xception(weights='imagenet', include_top=False, input_shape=(224,224,3))

x = inc.output
x = GlobalAveragePooling2D()(x)

x = Dense(256, activation='relu')(x)
#x = Dropout(0.5)(x)

y = Dense(2, activation='softmax')(x) # sigmoid instead of softmax to have independent probabilities

model = Model(inputs=inc.input, outputs=y)

# Train only the top layer
for layer in inc.layers:
    layer.trainable = False

In [30]:
model.load_weights(network_filepath)

In [31]:
preds = model.predict_generator(test_generator)

In [32]:
# whole data normal data predictions xception
true_labels = np.array(y_test_true).flatten()[:len(preds)]

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels) # without beta lr=0.0001

sklearn auc: 0.5802331657388251
tf auc: [0.5800938, 0.5800938]


In [45]:
with open(os.path.join(pred_file_dir, 'xception-whole_data_lore.csv'), 'w') as f:
    f.write("case, prediction1, prediction2")
    for i, p in enumerate(preds):
        f.write(str(i) + ',' + str(p[0]) + ',' + str(p[1]) + '\n')
        
        
with open(os.path.join(pred_file_dir, 'xception-whole_data_lore_sub.csv'), 'w') as f:
    f.write("case, prediction\n")
    for i, p in enumerate(preds):
        f.write(str(i) + ',' + str(p[1]) + '\n')

# VGG19  Changable

In [10]:
# directory to save the best model
file_dir = './Model'
pred_file_dir = './Preds'
model_name = 'vgg16_model_50K'
# 'stain_norm_VGG19_model_10K'    
network_filepath = os.path.join(file_dir, model_name + '.h5') #_limitless

In [11]:
# Define batch size.
batch_size = 128

# Parameters for generators
params = {
    'dim': (224, 224),
    'batch_size': batch_size,
    'n_classes': 2,
    'shuffle': False
}

# Generators
test_generator = DataGenerator(partition['test'], x_test, test_labels, **params)

In [12]:
from tensorflow import metrics, local_variables_initializer
from keras.backend import get_session
from sklearn.metrics import roc_auc_score as skroc
import tensorflow as tf

def auc(y_true, y_pred):
    auc = metrics.auc(y_true, y_pred)[1]
    get_session().run(local_variables_initializer())
    return auc

In [13]:
dependencies = {
    'auc': auc
}

In [14]:
model = load_model(network_filepath, custom_objects=dependencies)
# model = model_from_json(open(network_filepath).read())
# model.load_weights(os.path.join(os.path.dirname(network_filepath)))

W0516 15:33:25.314671 140371061563520 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/PatchCamelyon/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0516 15:33:25.328767 140371061563520 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/PatchCamelyon/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0516 15:33:25.344115 140371061563520 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/PatchCamelyon/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0516 15:33:25.530491 140371061563520 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/PatchCamelyon/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:174: The

In [15]:
preds = model.predict_generator(test_generator)

In [17]:
len(preds)

32768

In [17]:
# 50K normal data predictions
true_labels = np.array(y_test_true).flatten()[:len(preds)]

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels) # without beta lr=0.0001

sklearn auc: 0.8356441909319363
tf auc: [0.8354149, 0.8354149]


In [16]:
# 100K normal data predictions
true_labels = np.array(y_test_true).flatten()[:len(preds)]

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels) # without beta lr=0.0001

sklearn auc: 0.8442031326366719
tf auc: [0.8434149, 0.8434149]


In [17]:
# Stain Norm 10000 predictions
true_labels = np.array(y_test_true).flatten()[:len(preds)]

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels) # without beta lr=0.0001

sklearn auc: 0.9037854756619346
tf auc: [0.90377045, 0.90377045]


In [20]:
true_labels = np.array(y_test_true).flatten()

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels) # without beta lr=0.0001 learning rate reduction changed

sklearn auc: 0.9129782979784034
tf auc: [0.9129074, 0.9129074]


In [13]:
true_labels = np.array(y_test_true).flatten()

pred_labels = np.array([p[1] for p in preds])

calculate_auc(true_labels, pred_labels) # 0.00007

sklearn auc: 0.8974235205864627
tf auc: [0.8974233, 0.8974233]
