In [None]:
# Python imports.
import os
import shutil
import pickle
import numpy as np

# Keras imports.
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
import keras.backend as K

# Custom imports.
from configuration import HierarchicalConfig

c = HierarchicalConfig()
conf = c.config_dict

if not os.path.exists(conf['CACHE_DIR']):
    os.makedirs(conf['CACHE_DIR'])
    
num_images = len(os.listdir(conf['TEST_IMG_DIR'] + 'test'))
print(num_images)

In [None]:
def preprocess_nv_non_nv(im):
    return im - conf['MODEL_MEANS']['NV_NON_NV']

def preprocess_df_vasc(im):
    return im - conf['MODEL_MEANS']['DF_VASC_OTHERS']

def preprocess_bkl_mel_others(im):
    return im - conf['MODEL_MEANS']['MEL_BKL']

def preprocess_akiec_others(im):
    return im - conf['MODEL_MEANS']['AKIEC_OTHERS']

def preprocess_bcc_others(im):
    return im - conf['MODEL_MEANS']['BCC_OTHERS']

def f1(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return ( 2*(precision * recall) / (precision + recall + K.epsilon()) )

def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    return (true_positives / (possible_positives + K.epsilon()))

def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    return (true_positives / (predicted_positives + K.epsilon()))

In [None]:
def get_predictions(tag, verbose=0):
    
    prediction_dict = dict()
    
    custom_obj = {
        'f1': f1,
        'precision': precision,
        'recall': recall
    }
    
    if tag == 'NV_NON_NV':
        gen = ImageDataGenerator(preprocessing_function=preprocess_nv_non_nv, rescale=1./255)
    elif tag == 'DF_VASC_OTHERS':
        gen = ImageDataGenerator(preprocessing_function=preprocess_df_vasc, rescale=1./255)
    elif tag == 'MEL_BKL':
        gen = ImageDataGenerator(preprocessing_function=preprocess_bkl_mel_others, rescale=1./255)
    elif tag == 'AKIEC_OTHERS':
        gen = ImageDataGenerator(preprocessing_function=preprocess_akiec_others, rescale=1./255)
    else:
        # Intentionally not adding another else-if.
        gen = ImageDataGenerator(preprocessing_function=preprocess_bcc_others, rescale=1./255)
    
    if tag in conf['MODEL_NAMES'].keys():
        print('Loading {}'.format(tag))
    else:
        raise AttributeError('{} not in model name repo'.format(tag))
    
    best_model = load_model(conf['MODEL_NAMES'][tag], compile=True, custom_objects=custom_obj)
    print('{} Model loaded.'.format(tag))
    
    if tag == 'NV_NON_NV':
        images = gen.flow_from_directory(conf['TEST_IMG_DIR'], 
                                         target_size=conf['NV_IMG_DIMS'], 
                                         shuffle=False, 
                                         batch_size=conf['BATCH_SIZE'], 
                                         class_mode=None)
    else:
        images = gen.flow_from_directory(conf['TEST_IMG_DIR'], 
                                         target_size=conf['OTHER_IMG_DIMS'], 
                                         shuffle=False,
                                         batch_size=conf['BATCH_SIZE'], 
                                         class_mode=None)
    
    predictions = best_model.predict_generator(images, verbose=verbose)
    filenames = images.filenames
    
    if len(conf['MODEL_TAGS'][tag]) == 2:
        # Binary.
        positive_preds = predictions[predictions >= 0.5]
        negative_preds = predictions[predictions < 0.5]
        
        assert len(positive_preds) + len(negative_preds) == images.samples, "Length mismatch for {}".format(tag)
        
        if verbose > 0:
            print('Predicted {} positive examples.'.format(len(positive_preds)))
            print('Predicted {} negative examples.'.format(len(negative_preds)))
        
        if tag == 'MEL_BKL':
            positive_indices = np.where(predictions >= 0.5)[0]
            negative_indices = np.where(predictions < 0.5)[0]

            positive_images = [filenames[p] for p in positive_indices]
            negative_images = [filenames[n] for n in negative_indices]
            
            preds_pos = dict()
            for image, ix in zip(positive_images, positive_indices):
                stripped_im = image[image.index('/')+1:].strip('.jpg')
                preds_pos[stripped_im] = predictions[ix][0]
            prediction_dict[conf['MODEL_TAGS'][tag][-1]] = preds_pos
                
            preds_neg = dict()
            for image, ix in zip(negative_images, negative_indices):
                stripped_im = image[image.index('/')+1:].strip('.jpg')
                preds_neg[stripped_im] = 1 - predictions[ix][0]
            prediction_dict[conf['MODEL_TAGS'][tag][0]] = preds_neg
        else:
            positive_indices = np.where(predictions >= 0.5)[0]
            negative_indices = np.where(predictions < 0.5)[0]

            positive_images = [filenames[p] for p in positive_indices]
            negative_images = [filenames[n] for n in negative_indices]
        
            assert len(positive_indices) == len(positive_images)

            # An image name -> Prediction mapping.
            preds = dict()
            for image, ix in zip(positive_images, positive_indices):
                stripped_im = image[image.index('/')+1:].strip('.jpg')
                preds[stripped_im] = predictions[ix][0]

            # Class -> (image names -> predictions).
            prediction_dict[conf['MODEL_TAGS'][tag][-1]] = preds

            # Now, we move all the positive predictions out.
            for im in positive_images:
                try:
                    stripped_im = im[im.index('/')+1:]
                    shutil.move(src=conf['TEST_IMG_DIR']+im, 
                                dst=conf['CACHE_DIR']+'/'+stripped_im)
                except IOError:
                    print('Error while moving file {}'.format(im))
            if verbose > 0:
                print('Moved {} positive images to cache directory.'.format(len(positive_images)))
        
    
    elif len(conf['MODEL_TAGS'][tag]) > 2:
        # Multi-class. Predictions will be (m*n).
        num_classes = predictions.shape[1]
        classes = conf['MODEL_TAGS'][tag]
        
        assert len(classes) == num_classes, "Mismatch in feature dimension for {}.".format(tag)
        assert len(filenames) == predictions.shape[0], "Mismatch in image dimension for {}".format(tag)
        
        # Split the prediction matrix into {classes}:others
        preds_for_others = predictions[:, -1]
        class_preds = predictions[:, :-1]
        
        # These are the indices for files which are predicted as 'others'.
        others_predictions = np.where(preds_for_others > 0.5)[0]
        others_filenames = [filenames[o] for o in others_predictions]
        print('Identified {} images as \'others\''.format(len(others_filenames)))
        
        for class_name in classes[:-1]:
            prediction_dict[class_name] = dict()
        
        images_to_move = set()
        for col_ix in range(class_preds.shape[1]):
            _dict = dict()
            for row_ix in range(class_preds.shape[0]):
                if filenames[row_ix] not in others_filenames:
                    filename = filenames[row_ix]
                    images_to_move.add(filename)
                    stripped_name = filename[filename.index('/')+1:].strip('.jpg')
                    _dict[filenames[row_ix]] = class_preds[row_ix, col_ix]
                    
            prediction_dict[classes[col_ix]] = _dict
        
        for im in images_to_move:
            stripped_im = im[im.index('/')+1:]
            shutil.move(src=conf['TEST_IMG_DIR']+im, 
                        dst=conf['CACHE_DIR']+'/'+stripped_im)
            
    return prediction_dict

In [None]:
preds_1 = get_predictions(tag='NV_NON_NV', verbose=1)
with open(conf['PICKLE_FILES']['NV_NON_NV'], 'wb') as handle:
    pickle.dump(preds_1, handle, protocol=pickle.HIGHEST_PROTOCOL)
print('Pickled NV_NON_NV')


# \*Please restart kernel here\*

In [None]:
preds_2 = get_predictions(tag='DF_VASC_OTHERS', verbose=1)
with open(conf['PICKLE_FILES']['DF_VASC_OTHERS'], 'wb') as handle:
    pickle.dump(preds_2, handle, protocol=pickle.HIGHEST_PROTOCOL)
print('Pickled DF_VASC_OTHERS')


# \*Please restart kernel here\*

In [None]:
preds_3 = get_predictions(tag='BCC_OTHERS', verbose=1)
with open(conf['PICKLE_FILES']['BCC_OTHERS'], 'wb') as handle:
    pickle.dump(preds_3, handle, protocol=pickle.HIGHEST_PROTOCOL)
print('Pickled BCC_OTHERS')

# \*Please restart kernel here\*

In [None]:
preds_4 = get_predictions(tag='AKIEC_OTHERS', verbose=1)
with open(conf['PICKLE_FILES']['AKIEC_OTHERS'], 'wb') as handle:
    pickle.dump(preds_4, handle, protocol=pickle.HIGHEST_PROTOCOL)
print('Pickled AKIEC_OTHERS')

# \*Please restart kernel here\*

In [None]:
preds_5 = get_predictions(tag='MEL_BKL', verbose=1)
with open(conf['PICKLE_FILES']['MEL_BKL'], 'wb') as handle:
    pickle.dump(preds_5, handle, protocol=pickle.HIGHEST_PROTOCOL)
print('Pickled MEL_BKL')

# \*Please restart kernel here\*

In [None]:
import pandas as pd

# TODO: Get all the predictions.
with open(conf['PICKLE_FILES']['NV_NON_NV'], 'rb') as handle:
    preds_1 = pickle.load(handle)
print(len(preds_1['NV']))    
with open(conf['PICKLE_FILES']['DF_VASC_OTHERS'], 'rb') as handle:
    preds_2 = pickle.load(handle)
    
with open(conf['PICKLE_FILES']['BCC_OTHERS'], 'rb') as handle:
    preds_3 = pickle.load(handle)

with open(conf['PICKLE_FILES']['AKIEC_OTHERS'], 'rb') as handle:
    preds_4 = pickle.load(handle)
    
with open(conf['PICKLE_FILES']['MEL_BKL'], 'rb') as handle:
    preds_5 = pickle.load(handle)



class_list = conf['CLASSES']
prediction_matrix = np.zeros((1512, len(class_list)))

df = pd.DataFrame()
# df_columns = ['image'] + class_list
# df.columns = df_columns
ix_to_column = {col: i for i, col in enumerate(class_list)}

idx = 0
filenames = []
for img, probab in preds_1['NV'].items():
    prediction_matrix[idx, ix_to_column['NV']] = probab
    filenames.append(img)
    idx += 1

for img in preds_2['DF'].keys():
    probab_df = preds_2['DF'][img]
    probab_vasc = preds_2['VASC'][img]
    prediction_matrix[idx, ix_to_column['DF']] = probab_df
    prediction_matrix[idx, ix_to_column['VASC']] = probab_vasc
    filenames.append(img)
    idx += 1
    
for img, probab in preds_3['BCC'].items():
    prediction_matrix[idx, ix_to_column['BCC']] = probab
    filenames.append(img)
    idx += 1
    
for img, probab in preds_4['AKIEC'].items():
    prediction_matrix[idx, ix_to_column['AKIEC']] = probab
    filenames.append(img)
    idx += 1

# print(preds_5)
# for img in preds_5['MEL'].keys():
#     probab_mel = preds_5['MEL'][img]
#     probab_bkl = preds_5['BKL'][img]
#     prediction_matrix[idx, ix_to_column['MEL']] = probab_mel
#     prediction_matrix[idx, ix_to_column['BKL']] = probab_bkl
#     filenames.append(img)
#     idx += 1

for img, probab in preds_5['MEL'].items():
    prediction_matrix[idx, ix_to_column['MEL']] = probab
    filenames.append(img)
    idx += 1

for img, probab in preds_5['BKL'].items():
    prediction_matrix[idx, ix_to_column['BKL']] = probab
    filenames.append(img)
    idx += 1

print(idx)
df['image'] = filenames
for class_name, ix in ix_to_column.items():
    df[class_name] = prediction_matrix[:, ix]
    
df = df.sort_values('image')

In [None]:
def parse(x):
    if '/' in x:
        x = x[x.index('/')+1:].strip('.jpg')
    return x

In [None]:
df['image'] = df['image'].map(parse)

In [None]:
df = df.sort_values('image')
df.to_csv('test_predictions_hierarchical.csv', index=False)