In [22]:
EXPERIMENT_TYPE = 'shallowCNN_pcam'

In [23]:
import keras
import sys
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Activation, Flatten, Dropout
from keras.optimizers import SGD, Adadelta
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint
from keras import metrics
from sklearn.metrics import roc_curve, auc
import tensorflow as tf
## Loading OS libraries to configure server preferences
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings("ignore")
import setproctitle
SERVER_NAME = 'ultrafast'
import time
import sys
import shutil
## Adding PROCESS_UC1 utilities
#sys.path.append('/home/mara/multitask_adversarial/lib/TASK_2_UC1/')
import math
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

import pandas as pd
from keras.utils import HDF5Matrix
from keras.utils.data_utils import get_file
from keras import backend as K

In [40]:
tf.__version__

'1.8.0'

In [41]:
keras.__version__

'2.1.6'

In [25]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(1)
keras.backend.set_session(tf.Session(config=config))
init = tf.global_variables_initializer()
verbose=1 
COLOR = True
global new_folder
folder_name=EXPERIMENT_TYPE
new_folder = '/home/mara/multitask_adversarial/results/'+folder_name #new_folder
if not os.path.exists(new_folder):
    os.mkdir(new_folder)
global error_log
error_log=open(new_folder+'/ERR.log', 'w')
BATCH_SIZE = 32

# SAVE FOLD
f=open(new_folder+"/seed.txt","w")
seed=0
print(seed)
f.write(str(seed))
f.close()

# SET PROCESS TITLE
setproctitle.setproctitle('{}'.format(EXPERIMENT_TYPE))

# SET SEED
np.random.seed(seed)
tf.set_random_seed(seed)


# STAIN NORMALIZATION
def get_normalizer(patch, save_folder=''):
    normalizer = ReinhardNormalizer()
    normalizer.fit(patch)
    np.save('{}/normalizer'.format(save_folder),normalizer)
    np.save('{}/normalizing_patch'.format(save_folder), patch)
    print('Normalisers saved to disk.')
    return normalizer

def normalize_patch(patch, normalizer):
    return np.float64(normalizer.transform(np.uint8(patch)))

0


In [26]:
import h5py as hd
y_train = hd.File('../../data/pcam/camelyonpatch_level_2_split_train_y.h5', 'r')['y']
x_train = hd.File('../../data/pcam/camelyonpatch_level_2_split_train_x.h5', 'r')['x']
y_val = hd.File('../../data/pcam/camelyonpatch_level_2_split_valid_y.h5', 'r')['y']
x_val=hd.File('../../data/pcam/camelyonpatch_level_2_split_valid_x.h5', 'r')['x']

In [27]:
#logging.info("building model")
input_shape=(96,96,3)
nb_epochs = 15 
batch_size = 32 
nb_dense_layers = 256 
verbose = 2 

model = Sequential()

# 1st conv => relu => pool
model.add(Conv2D(32, kernel_size=(5,5), padding="same", input_shape=input_shape))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

# 2nd conv => relu => pool
model.add(Conv2D(64, kernel_size=(5,5), padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

# flatten => relu layers
model.add(Flatten())
model.add(Dense(nb_dense_layers))
model.add(Activation("relu"))

# final binary layer 
model.add(Dense(1, activation="sigmoid"))

# compile and display model
model.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])
model.summary()
print("nb layers: "+str(len(model.layers)))

# use checkpointing to save best weights
checkpoint = ModelCheckpoint("{}/pcam_weights.hd5".format(new_folder), monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks = [checkpoint]


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 96, 96, 32)        2432      
_________________________________________________________________
activation_4 (Activation)    (None, 96, 96, 32)        0         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 48, 48, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 48, 48, 64)        51264     
_________________________________________________________________
activation_5 (Activation)    (None, 48, 48, 64)        0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 24, 24, 64)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 36864)             0         
__________

In [28]:
starting_time = time.time()

In [29]:
global normalizer
normalization_reference_patch = x_train[0]
normalizer = get_normalizer(normalization_reference_patch, save_folder=new_folder)


Using brightness standardization
Normalisers saved to disk.


In [30]:
def get_batch_data(patch_list, labels, batch_size=32):
    num_samples=len(patch_list)
    while True:
        for offset in range(0,num_samples, batch_size):
            batch_x = []
            batch_y = []
            batch_samples=patch_list[offset:offset+batch_size]
            for patch_id in range(len(batch_samples)):
                #print(len(batch_samples))
                patch=patch_list[patch_id]
                patch=normalize_patch(patch, normalizer)
                #print('normalized_patch')
                label=labels[patch_id]
                batch_x.append(patch)
                batch_y.append(label)
            #print(len(batch_x))
            std_=np.std(np.asarray(batch_x))
            mean_=np.mean(np.asarray(batch_x))
            batch_x=np.asarray(batch_x, dtype=np.float32)-mean_
            batch_x/=std_
            batch_y=np.asarray(batch_y, dtype=np.float32)
            yield batch_x, batch_y

In [31]:
y_train=y_train[:].ravel()

In [32]:
y_val=y_val[:].ravel()

In [33]:
x_train=np.asarray(x_train[:])

In [34]:
x_val=np.asarray(x_val[:])

In [35]:
len(x_train)//32

8192

In [36]:
train_generator = get_batch_data(x_train, y_train)

In [37]:
val_generator = get_batch_data(x_val, y_val)

In [38]:
history = model.fit_generator(train_generator,
                    steps_per_epoch=len(y_train)//32,
                    epochs=30,
                    verbose=True,
                    validation_data=val_generator,
                    validation_steps=len(y_val)//32)
end_time = time.time()
total_training_time = end_time - starting_time 

Epoch 1/30
  51/8192 [..............................] - ETA: 5:57 - loss: 3.4315 - acc: 0.7788

KeyboardInterrupt: 

In [18]:
model.save_weights('{}/weights.h5'.format(new_folder))

In [19]:
model.load_weights('{}/weights.h5'.format(new_folder))

In [21]:
model.evaluate_generator(val_generator, steps=len(y_val)//32)

[2.490997791290283, 0.84375]

In [42]:
model.evaluate_generator(train_generator, steps=len(y_train)//32)

[3.487396717071533, 0.78125]