In [None]:
## Loading OS libraries to configure server preferences
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings("ignore")
import setproctitle
SERVER_NAME = 'ultrafast'
EXPERIMENT_TYPE='test_baseline'
import time
import sys
import shutil
## Adding PROCESS_UC1 utilities
sys.path.append('../../lib/TASK_2_UC1/')
from models import *
from util import otsu_thresholding
from extract_xml import *
from functions import *                   
sys.path.append('../../lib/')
from mlta import *
import math
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = '0'
keras.backend.set_session(tf.Session(config=config))

verbose=1 

cam16 = hd.File('/home/mara/adversarialMICCAI/data/ultrafast/cam16_500/patches.hdf5',  'r', libver='latest', swmr=True)
all500 = hd.File('/home/mara/adversarialMICCAI/data/ultrafast/all500/patches.hdf5',  'r', libver='latest', swmr=True)
extra17 = hd.File('/home/mara/adversarialMICCAI/data/ultrafast/extra17/patches.hdf5',  'r', libver='latest', swmr=True)
tumor_extra17=hd.File('/home/mara/adversarialMICCAI/data/ultrafast/1129-1155/patches.hdf5', 'r', libver='latest', swmr=True)
test2 = hd.File('/mnt/nas2/results/IntermediateResults/Camelyon/ultrafast/test_data2/patches.hdf5', 'r', libver='latest', swmr=True)
pannuke= hd.File('/mnt/nas2/results/IntermediateResults/Camelyon/pannuke/patches_fix.hdf5', 'r', libver='latest', swmr=True)

global data
data={'cam16':cam16,'all500':all500,'extra17':extra17, 'tumor_extra17':tumor_extra17, 'test_data2': test2, 'pannuke':pannuke}
global concept_db
concept_db = hd.File('/mnt/nas2/results/IntermediateResults/Mara/MICCAI2020/MELBA_normalized_concepts_fix.hd', 'r')
# Note: nuclei_concepts not supported yet
global nuclei_concepts
nuclei_concepts=hd.File('/mnt/nas2/results/IntermediateResults/Mara/MICCAI2020/normalized_nuclei_concepts_db_new_try_def.hdf5','r')

#SYSTEM CONFIGS 
CONFIG_FILE = 'doc/config.cfg'
COLOR = True
BATCH_SIZE = 32

seed=1
print seed

# SET PROCESS TITLE
setproctitle.setproctitle('{}'.format(EXPERIMENT_TYPE))

# SET SEED
np.random.seed(seed)
tf.set_random_seed(seed)

# DATA SPLIT CSVs 
train_csv=open('/mnt/nas2/results/IntermediateResults/Camelyon/train_shuffle.csv', 'r') # How is the encoding of .csv files ?
val_csv=open('/mnt/nas2/results/IntermediateResults/Camelyon/val_shuffle.csv', 'r')
test_csv=open('/mnt/nas2/results/IntermediateResults/Camelyon/test_shuffle.csv', 'r')
train_list=train_csv.readlines()
val_list=val_csv.readlines()
test_list=test_csv.readlines()
test2_csv = open('/mnt/nas2/results/IntermediateResults/Camelyon/test2_shuffle.csv', 'r')
test2_list=test2_csv.readlines()
test2_csv.close()
train_csv.close()
val_csv.close()
test_csv.close()
#data_csv=open('/mnt/nas2/results/IntermediateResults/Camelyon/data_shuffle.csv', 'r')
#data_csv=open('./data/train.csv', 'r')
data_csv=open('/mnt/nas2/results/IntermediateResults/Camelyon/pannuke/pannuke_train_shuffled.csv', 'r')
data_list=data_csv.readlines()
data_csv.close()

# STAIN NORMALIZATION
def get_normalizer(patch, save_folder=''):
    normalizer = ReinhardNormalizer()
    normalizer.fit(patch)
    np.save('{}/normalizer'.format(save_folder),normalizer)
    np.save('{}/normalizing_patch'.format(save_folder), patch)
    print('Normalisers saved to disk.')
    return normalizer

def normalize_patch(patch, normalizer):
    return np.float64(normalizer.transform(np.uint8(patch)))

In [None]:
# LOAD DATA NORMALIZER
global normalizer
db_name, entry_path, patch_no = get_keys(data_list[0])
normalization_reference_patch = data[db_name][entry_path][patch_no]
normalizer = get_normalizer(normalization_reference_patch, save_folder='./')
# Retrieve Concept Measures
def get_concept_measure(db_name, entry_path, patch_no, measure_type=''):
    ### note: The measures in the file should have been scaled beforehand
    # to have zero mean and unit std
    if db_name=='pannuke':
        #import pdb; pdb.set_trace()
        try:
            cm=concept_db[entry_path+'  /'+measure_type][0]
            #print 'pannuke ', cm
            return cm
        except:
            print "[ERR]: {}, {}, {}, {}".format(db_name, entry_path, patch_no, measure_type)
            print entry_path+'  /'+measure_type
            return 1.
    else:
        try: 
            cm=concept_db[db_name+'/'+entry_path+'/'+str(patch_no)+'/'+measure_type][0]
            #print 'other ', cm
            return cm
        except:
            print "[ERR]: {}, {}, {}, {}".format(db_name, entry_path, patch_no, measure_type)
            #error_log.write('[get_concept_measure] {}, {}, {}, {}'.format(db_name, entry_path, patch_no, measure_type))
            return 1.
def get_segmented_concept_measure(db_name, entry_path, patch_no, measure_type=''):
    ### note: The measures in the file should have been scaled beforehand
    # to have zero mean and unit std
    try:
        cm = nuclei_concepts[db_name+'/'+entry_path+'/'+str(patch_no)+'/'+measure_type][0]
    except:
        #error_log.write('[get_segmented_concept_measure] {}, {}, {}, {}'.format(db_name, entry_path, patch_no, measure_type))
        print "[ERROR] Issue retreiving concept measure for {}, {}, {}, {}".format(db_name, entry_path, patch_no, measure_type)
        return 1.

# BATCH GENERATORS
def get_batch_data(patch_list, batch_size=32):
    num_samples=len(patch_list)
    while True:
        offset = 0
        for offset in range(0,num_samples, batch_size):
            batch_x = []
            batch_y = []
            batch_contrast=[]
            batch_samples=patch_list[offset:offset+batch_size]
            for line in batch_samples[:(num_samples//batch_size)*batch_size]:
                db_name, entry_path, patch_no = get_keys(line)
                patch=data[db_name][entry_path][patch_no]
                patch=normalize_patch(patch, normalizer)
                patch=keras.applications.inception_v3.preprocess_input(patch) 
                label = get_class(line, entry_path) 
                batch_x.append(patch)
                batch_y.append(label)
                # ONES
                #batch_ones.append(1.)
                # NOISE
                #batch_noise.append(np.random.normal(0.))
                # CONCEPT = contrast
                batch_contrast.append(get_concept_measure(db_name, entry_path, patch_no, measure_type='norm_contrast'))
                # CONCEPT = domain
                #batch_domain.append(get_domain(db_name, entry_path))
                # CONCEPT = nuclei area
                #batch_n_area.append(get_segmented_concept_measure(db_name, entry_path, patch_no, measure_type='area'))
                #batch_contrast.append(get_segmented_concept_measure(db_name, entry_path, patch_no, measure_type='area'))
                # CONCEPT = nuclei counts
                #batch_n_count.append(get_segmented_concept_measure(db_name, entry_path, patch_no, measure_type='count'))
                #batch_contrast.append(get_segmented_concept_measure(db_name, entry_path, patch_no, measure_type='count'))
            #batch_domain=keras.utils.to_categorical(batch_domain, num_classes=6)
            batch_x=np.asarray(batch_x, dtype=np.float32)
            batch_y=np.asarray(batch_y, dtype=np.float32)
            batch_cm=np.asarray(batch_contrast, dtype=np.float32) #ones(len(batch_y), dtype=np.float32)
            #batch_cm=np.ones(len(batch_y), dtype=np.float32)
            yield [batch_x, batch_y, batch_cm], None
            

In [None]:
train_generator=get_batch_data(data_list, batch_size=BATCH_SIZE)

In [None]:
data_list[:10]

In [None]:
[x,y,cm],_=train_generator.next()

In [None]:
concept_db['extra17']

In [None]:
cm