In [None]:
import numpy  as np 
import pandas as pd 
import os
import cv2 
import gc
import psutil
import matplotlib.pyplot as plt
from sklearn.metrics import cohen_kappa_score, confusion_matrix

import keras
from keras.preprocessing import image
from keras.applications  import DenseNet121
from keras.activations   import softmax, relu
from keras.optimizers    import Adam
from keras.callbacks     import Callback, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models        import Sequential, Model
from keras.layers        import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Input
from keras.layers        import Dropout, Flatten, Dense, BatchNormalization

# Global constants
IMG_DIM       = 256
BATCH_SIZE    = 32
CHANNELS      = 3
NUM_CLASSES   = 5

print(os.listdir("."))
print(os.listdir("../"))
print(os.listdir("../input/"))
print(os.listdir("../input/aptos2019-blindness-detection"))
print(os.listdir("../input/densenetmulti"))

INPUT_FOLDER = '../input/aptos2019-blindness-detection/'

# Preprocessing Functions
We need to crop and preprocess all the test images into the working directory

In [None]:

def crop(gray, img, percent_smaller):
    
    thresh = 8
    
    top    = 0
    left   = 0
    bottom = gray.shape[0] - 1
    right  = gray.shape[1] - 1
    
    # work in from the top and bottom along the middle collumn
    middleCol = gray[:, int(gray.shape[1]/2)] > thresh
    while middleCol[top] == 0:
        top += 1
    while middleCol[bottom] == 0:
        bottom -= 1
        
    # work in from the sides along the middle row
    middleRow = gray[int(gray.shape[0]/2)] > thresh
    while middleRow[left] == 0:
        left += 1
    while middleRow[right] == 0:
        right -= 1
        
    height = bottom - top
    width  = right - left
    
    bottom -= int(percent_smaller*height)
    top    += int(percent_smaller*height)
    right  -= int(percent_smaller*width)
    left   += int(percent_smaller*width)
        
    if height < 100 or width < 100:
        print("Error: squareUp: bottom:", bottom, "top:", top)
        print("Error: squareUp: right:", right, "left:", left)
        return img
    
    return img[top:bottom, left:right]


def bensYCC(bgr, weight=4, gamma=20):
        
    # convert to y, cr, cb so that we can modify the image based on just the y (brightness)
    ycc = cv2.cvtColor(bgr, cv2.COLOR_BGR2YCrCb)
    y, cr, cb = cv2.split(ycc)

    # perform bens algorithm on the y component
    y = cv2.addWeighted(y, weight, cv2.GaussianBlur(y, (0,0), gamma), -weight, 128)

    # merge the ycc back together, and recolor it
    ycc_modified = cv2.merge((y, cr, cb))
    bens = cv2.cvtColor(ycc_modified, cv2.COLOR_YCrCb2BGR)
    
    return bens 


# def claheYCC(bgr, clipLimit=5, grid=8):
    
#     # convert to y, cr, cb so that we can modify the image based on just the y (brightness)
#     ycc = cv2.cvtColor(bgr, cv2.COLOR_BGR2YCrCb)
#     y, cr, cb = cv2.split(ycc)

#     # perform the clahe algorithm on the y component
#     clahe = cv2.createCLAHE(clipLimit=clipLimit, tileGridSize=(grid, grid))
#     y = clahe.apply(y)
#     y = adjust_gamma(y, 1+np.log(110)-np.log(np.median(y)))

#     # merge the ycc back together, and recolor it
#     ycc_modified = cv2.merge((y, cr, cb))
#     img = cv2.cvtColor(ycc_modified, cv2.COLOR_YCrCb2BGR)
    
#     return img 

def bensSimple(bgr, weight=4, gamma=20):
        
    img = cv2.addWeighted(bgr, weight, cv2.GaussianBlur(bgr, (0,0), gamma), -weight, 128)
    
    return img 


# def reflectAndSquareUp(img):
    
#     height = img.shape[0]
#     width  = img.shape[1]
    
#     # if its portrait mode, it's probably already kind of square. Make it properly square by cutting
#     # down the height until the dimensions match
#     if (height > width):
        
#         offset = int((height - width)/2)
#         return img[offset:offset+width]
    
#     # otherwise, do the whole reflection thingo
#     else:
#         if len(img.shape) == 3:
#             new_img = np.zeros((width, width, img.shape[2]), np.uint8)
#         else:
#             new_img = np.zeros((width, width), np.uint8)

#         #  0  |
#         #     |
#         #  h1 |####
#         #     |####
#         #     |####
#         #  h2 |
#         #     |

#         h1 = int((width - height)/2)
#         h2 = h1 + height

#         # paste the original into the center
#         new_img[h1:h2,:] = img

#         # paste in the reflections
#         for i in range(h1):
#             new_img[h1-i] = img[i]

#         for i in range(width - h2):
#             new_img[h2+i] = img[height - i - 1]

#         return new_img

# def circleMask(img):
    
#     if (img.shape[0] != img.shape[1]):
#         print("Error: circle mask assumes square image")
#         return img
    
#     dim = img.shape[0]
#     half = int(dim/2)
    
#     # crop out circle:
#     circle_mask = np.zeros((dim, dim), np.uint8)
#     circle_mask = cv2.circle(circle_mask, (half, half), half, 1, thickness=-1)

#     return cv2.bitwise_and(img, img, mask=circle_mask)


# build a lookup table mapping the pixel values [0, 255] to
# their adjusted gamma values
# def adjust_gamma(image, gamma=1.0):
#     invGamma = 1.0 / gamma
#     table = np.array([((i / 255.0) ** invGamma) * 255
#                      for i in np.arange(0, 256)]).astype("uint8")
#     return cv2.LUT(image, table)


def process(bgr, model):
    
    green = bgr[:,:,1] # use green as a greyscale
    
    # ========= Crop into 640 x 480 format ========
    if bgr.shape != (480, 640, 3):
        
        cropped = crop(green, bgr, 0.02)
        width  = int(cropped.shape[1] * 0.9)
        height = int(width * 480 / 640)
        if height > cropped.shape[0]:
            height = cropped.shape[0] - 2
        h = int((cropped.shape[0] - height) / 2)
        w = int((cropped.shape[1] - width) / 2)

        test_crop = cropped[h:height+h,w:width+w,:]
        
    else:
        test_crop = bgr
    
    if model == "normal":
        colouring_fn = bensYCC
    elif model == "weird":
        colouring_fn = bensSimple
    elif model == "clahe":
        colouring_fn = claheYCC
    else:
        print(f"Error, invalid model type given: {model}")
        
    resized = cv2.resize(test_crop, (IMG_DIM, IMG_DIM), interpolation=cv2.INTER_AREA)
    img     = colouring_fn(resized)
    
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)


In [None]:
# Data generators, which put their data into a random crop generator, which is then fed into
# the network during training

def dataGenerator(jitter=0.1):
    datagen = image.ImageDataGenerator(rescale=1./255,
                                       horizontal_flip  = True and (jitter > 0.01), 
                                       vertical_flip    = True and (jitter > 0.01),
                                       zoom_range       = [max(0.8, 1-5*jitter), 1],
                                       rotation_range   = int(600*jitter),
                                       brightness_range = [1-jitter/3, 1+jitter/3],
                                       fill_mode        = "mirror",
                                       channel_shift_range=int(30*jitter),
                                      )
    return datagen


In [None]:
def test_datagen_plot(processing_function, jitter=0.03):
    
    images_dir  = f"{INPUT_FOLDER}test_images/"
    df          = pd.read_csv(f"{INPUT_FOLDER}test.csv")
    df.id_code  = df.id_code.apply(lambda x: x + ".png")

    # process images
    img_block = np.empty((100, IMG_DIM, IMG_DIM, CHANNELS))
    j = 1
    for i, filename in enumerate(df.sample(100).id_code):
        bgr = cv2.imread(images_dir + filename)
        img_block[i,:,:,:] = process(bgr, processing_function)
#         if bgr.shape != (480, 640, 3) and j <= 8:
#             ax = figure.add_subplot(4,4, j)
#             plt.imshow(img_block[i,:,:,:]/255.)
#             j += 1
#         elif bgr.shape == (480, 640, 3) and j > 8:
#             ax = figure.add_subplot(4,4, j)
#             plt.imshow(img_block[i,:,:,:]/255.)
#             j += 1
#             if j > 16: 
#                 return

    datagen_sample = dataGenerator(jitter).flow(img_block)
    for x in datagen_sample:
        for j in range(16):
            ax = figure.add_subplot(4,4, j+1)
            plt.imshow(x[j])
        break

figure=plt.figure(figsize=(22,20))
test_datagen_plot("weird")
gc.collect()


# Model Importing Function

In [None]:
def load_network(network_name):
    
    weights = f"../input/densenetmulti/{network_name}.h5"
    if network_name == "normal":
        weights = f"../input/densenetmulti/dense-0.800.h5"
    
    model = Sequential()
    model.add(DenseNet121(weights=None, include_top=False, input_shape=(IMG_DIM, IMG_DIM, CHANNELS)))
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CLASSES, activation='sigmoid'))
    
    model.load_weights(weights)
    model.compile(optimizer=Adam(lr=0.00005), loss='binary_crossentropy',  metrics=['accuracy'])
    
    return model


# Make jittered predictions on either data set

In [None]:
# def prediction_convert_sum(predictions, thresholds):
    
#     thresholded = np.zeros(predictions.shape)
    
#     for i in range(NUM_CLASSES):
#         thresholded[:,i] = predictions[:,i] > thresholds[i]
#     y_val = thresholded.astype(int).sum(axis=1) - 1
#     return y_val

def prediction_convert_highest(predictions, thresholds):
    thresholded = np.zeros(predictions.shape)
    for i in range(NUM_CLASSES):
        thresholded[:,i] = predictions[:,i] > thresholds[i]
        
    y_val = np.zeros((predictions.shape[0]), dtype=np.int)
    for i in range(predictions.shape[0]):
        for j in range(4, -1, -1):
            if thresholded[i][j]:
                y_val[i] = j
                break
    return y_val

def make_predictions(d_set, models):

    images_dir  = f"{INPUT_FOLDER}{d_set}_images/"
    df          = pd.read_csv(f"{INPUT_FOLDER}{d_set}.csv")
    df.id_code  = df.id_code.apply(lambda x: x + ".png")

    block_size  = 512
    total       = df.index.size

    jitter_amounts = [0, 0.02, 0.02, 0.02, 0.03, 0.03, 0.03, 0.1]
#     jitter_amounts = [0.01, 0.02, 0.05]
    
    ensemble_predictions = np.zeros((df.index.size, len(jitter_amounts)*len(models), NUM_CLASSES))
    
    for m, model in enumerate(models):
    
        print(f"Making predictions with the {model} model on the {d_set} dataset.")
        neural_net = load_network(model)

        # have to do everything in blocks of images
        for start in range(0, total, block_size):

            # get subset
            end = start + block_size
            if end > total:
                end = total

            # process images
            img_block = np.empty((end-start, IMG_DIM, IMG_DIM, CHANNELS))
            for i, filename in enumerate(df[start:end].id_code):
                try:
                    bgr = cv2.imread(images_dir + filename)
                    img_block[i,:,:,:] = process(bgr, model)
                except:
                    print("Error opening or manipulating image")
                    img_block[i,:,:,:] = 128.

            # make predictions
            for i, jit in enumerate(jitter_amounts):
                datagen = dataGenerator(jit).flow(img_block, shuffle=False)
                ensemble_predictions[start:end, i + len(models)*m] = neural_net.predict_generator(generator=datagen, 
                                                                      steps=len(datagen), workers=4, verbose=1)
            
            print(f"{start} - {end} finished")
            gc.collect()
        
    return np.median(ensemble_predictions, axis=1)

# Find the best Class Thresholds

In [None]:
# def find_best_thresholds(train_predictions):
    
#     # get predictions
#     print("Finding best thresholds...")
    
#     prediction_convert = prediction_convert_highest
    
#     # make test predictions
#     gc.collect()
    
#     train_df = pd.read_csv(f"{INPUT_FOLDER}train.csv")
#     y_actual = train_df.diagnosis.astype(int).values
    
#     thresholds = [0.5 for i in range(NUM_CLASSES)]
#     d_thresh   = 0.25
    
#     for sweep in range(5):

#         for label in range(5):
            
#             currKappa = cohen_kappa_score(y_actual, prediction_convert(train_predictions, thresholds), weights='quadratic')
            
#             print(currKappa)
            
#             thresholds[label] += d_thresh
#             kappaUp = cohen_kappa_score(y_actual, prediction_convert(train_predictions, thresholds), weights='quadratic')

#             thresholds[label] -= 2*d_thresh
#             kappaDown = cohen_kappa_score(y_actual, prediction_convert(train_predictions, thresholds), weights='quadratic')
            
#             thresholds[label] += d_thresh 
            
#             if kappaUp > currKappa:
#                 thresholds[label] += d_thresh
#             elif kappaDown > currKappa:
#                 thresholds[label] -= d_thresh

#         d_thresh /= 2
    
#     gc.collect()
#     return thresholds

# train_predictions = make_predictions("train", ["normal", "weird"])
# best_thresholds   = find_best_thresholds(train_predictions)
# print(best_thresholds)


In [None]:

# thresholds = [0.5 for i in range(5)]

# train_df = pd.read_csv(f"{INPUT_FOLDER}train.csv")
# y_actual = train_df.diagnosis.astype(int).values
# train_fracs = train_df.diagnosis.value_counts()/train_df.index.size
# print(train_fracs)

# for sweep in range(5):
#     for i in range(4):
#         _, counts = np.unique(prediction_convert_highest(train_predictions, thresholds), return_counts=True)
#         pred_fracs = counts / len(train_predictions)

#         print(pred_fracs)

#         # if the train fraction is higher than prediction fraction, we need to increase the thresholds
#         try:
#             while train_fracs[i] > pred_fracs[i]:
#                 for j in range(i, 5):
#                     thresholds[j] += 0.01
#                 _, counts = np.unique(prediction_convert_highest(train_predictions, thresholds), return_counts=True)
#                 pred_fracs = counts / len(train_predictions)
#         except:
#             for j in range(i, 5):
#                 thresholds[j] -= 0.01
#             _, counts = np.unique(prediction_convert_highest(train_predictions, thresholds), return_counts=True)
#             pred_fracs = counts / len(train_predictions)

#         # conversely decrease them if lower
#         try:
#             while train_fracs[i] < pred_fracs[i]:
#                 for j in range(i, 5):
#                     thresholds[j] -= 0.01
#                 _, counts = np.unique(prediction_convert_highest(train_predictions, thresholds), return_counts=True)
#                 pred_fracs = counts / len(train_predictions)
#         except:
#             for j in range(i, 5):
#                 thresholds[j] += 0.01
#             _, counts = np.unique(prediction_convert_highest(train_predictions, thresholds), return_counts=True)
#             pred_fracs = counts / len(train_predictions)

#         print(pred_fracs, "\n")
    
#     print(cohen_kappa_score(y_actual, prediction_convert_highest(train_predictions, thresholds), weights='quadratic'), thresholds, "\n")
    

In [None]:

# def label_convert_two_stage(stage_1_preds, stage_2_preds):
    
#     thresh_1 = np.zeros((stage_1_preds.shape[0], 3))
#     thresh_2 = np.zeros((stage_2_preds.shape[0], 3))
    
#     for i in range(3):
#         thresh_1[:,i] = stage_1_preds[:,i] > 0.5
#         thresh_2[:,i] = stage_2_preds[:,i] > 0.5
    
#     y_val   = thresh_1.astype(int).sum(axis=1) - 1
#     y_val_2 = thresh_2.astype(int).sum(axis=1) + 1
    
#     for i in range(stage_1_preds.shape[0]):
#         if y_val[i] == 2:
#             y_val[i] = y_val_2[i]
#     return y_val

# def label_convert_two_stage_top(stage_1_preds, stage_2_preds):
    
#     thresh_1 = np.zeros((stage_1_preds.shape[0], 3))
#     thresh_2 = np.zeros((stage_2_preds.shape[0], 3))
    
#     for i in range(3):
#         thresh_1[:,i] = stage_1_preds[:,i] > 0.5
#         thresh_2[:,i] = stage_2_preds[:,i] > 0.5
    
#     y_val   = np.zeros(stage_1_preds.shape[0])
#     y_val_2 = np.zeros(stage_1_preds.shape[0])
#     for i in range(stage_1_preds.shape[0]):
#         for j in range(2, -1, -1):
#             if thresh_1:
#                 y_val[i] = j
#         for j in range(2, -1, -1):
#             if thresh_2:
#                 y_val_2[i] = j + 2
    
#     for i in range(stage_1_preds.shape[0]):
#         if y_val[i] == 2:
#             y_val[i] = y_val_2[i]
#     return y_val

# Perform predictions on test set 

In [None]:
# thresholds = [0.5 for i in range(NUM_CLASSES)]
thresholds = [0.5, 0.5, 0.4, 0.4, 0.3]

# make test predictions
predictions = make_predictions("test", ["normal", "weird"])

as_classes = prediction_convert_highest(predictions, thresholds)
print(as_classes[:10])

# save to csv
test_df = pd.read_csv(INPUT_FOLDER + 'test.csv')
test_df['diagnosis'] = as_classes
test_df.to_csv('submission.csv', index=False)