In [1]:
import os
import sys
sys.path.append(os.path.abspath('../input/efficientnet/efficientnet-master/efficientnet-master/'))
from efficientnet import EfficientNetB5

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# Standard dependencies
import cv2
import time
import scipy as sp
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
from functools import partial
import matplotlib.pyplot as plt

# Machine Learning
import tensorflow as tf
import keras
from keras import initializers
from keras import regularizers
from keras import constraints
from keras import backend as K
from keras.activations import elu, relu, sigmoid
from keras.optimizers import Adam
from keras.models import Sequential
from keras.engine import Layer, InputSpec
from keras.utils.generic_utils import get_custom_objects
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
from keras.layers import Dense, Conv2D, Flatten, GlobalAveragePooling2D, Dropout
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import cohen_kappa_score

# Path specifications
KAGGLE_DIR = '../input/aptos2019-blindness-detection/'
TRAIN_DF_PATH = KAGGLE_DIR + "train.csv"
TEST_DF_PATH = KAGGLE_DIR + 'test.csv'
TRAIN_IMG_PATH = KAGGLE_DIR + "train_images/"
TEST_IMG_PATH = KAGGLE_DIR + 'test_images/'

# Set seed for reproducability
seed = 1234
np.random.seed(seed)
tf.set_random_seed(seed)

# For keeping time. GPU limit for this competition is set to ± 9 hours.
t_start = time.time()

# File sizes and specifications
print('\n# Files and file sizes')
for file in os.listdir(KAGGLE_DIR):
    print('{}| {} MB'.format(file.ljust(30), 
                             str(round(os.path.getsize(KAGGLE_DIR + file) / 1000000, 2))))


# Files and file sizes
sample_submission.csv         | 0.03 MB
test_images                   | 0.07 MB
train_images                  | 0.13 MB
test.csv                      | 0.03 MB
train.csv                     | 0.05 MB


In [3]:
class GroupNormalization(Layer):
    """Group normalization layer
    Group Normalization divides the channels into groups and computes within each group
    the mean and variance for normalization. GN's computation is independent of batch sizes,
    and its accuracy is stable in a wide range of batch sizes
    # Arguments
        groups: Integer, the number of groups for Group Normalization.
        axis: Integer, the axis that should be normalized
            (typically the features axis).
            For instance, after a `Conv2D` layer with
            `data_format="channels_first"`,
            set `axis=1` in `BatchNormalization`.
        epsilon: Small float added to variance to avoid dividing by zero.
        center: If True, add offset of `beta` to normalized tensor.
            If False, `beta` is ignored.
        scale: If True, multiply by `gamma`.
            If False, `gamma` is not used.
            When the next layer is linear (also e.g. `nn.relu`),
            this can be disabled since the scaling
            will be done by the next layer.
        beta_initializer: Initializer for the beta weight.
        gamma_initializer: Initializer for the gamma weight.
        beta_regularizer: Optional regularizer for the beta weight.
        gamma_regularizer: Optional regularizer for the gamma weight.
        beta_constraint: Optional constraint for the beta weight.
        gamma_constraint: Optional constraint for the gamma weight.
    # Input shape
        Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
    # Output shape
        Same shape as input.
    # References
        - [Group Normalization](https://arxiv.org/abs/1803.08494)
    """

    def __init__(self,
                 groups=4,
                 axis=-1,
                 epsilon=1e-5,
                 center=True,
                 scale=True,
                 beta_initializer='zeros',
                 gamma_initializer='ones',
                 beta_regularizer=None,
                 gamma_regularizer=None,
                 beta_constraint=None,
                 gamma_constraint=None,
                 **kwargs):
        super(GroupNormalization, self).__init__(**kwargs)
        self.supports_masking = True
        self.groups = groups
        self.axis = axis
        self.epsilon = epsilon
        self.center = center
        self.scale = scale
        self.beta_initializer = initializers.get(beta_initializer)
        self.gamma_initializer = initializers.get(gamma_initializer)
        self.beta_regularizer = regularizers.get(beta_regularizer)
        self.gamma_regularizer = regularizers.get(gamma_regularizer)
        self.beta_constraint = constraints.get(beta_constraint)
        self.gamma_constraint = constraints.get(gamma_constraint)

    def build(self, input_shape):
        dim = input_shape[self.axis]

        if dim is None:
            raise ValueError('Axis ' + str(self.axis) + ' of '
                             'input tensor should have a defined dimension '
                             'but the layer received an input with shape ' +
                             str(input_shape) + '.')

        if dim < self.groups:
            raise ValueError('Number of groups (' + str(self.groups) + ') cannot be '
                             'more than the number of channels (' +
                             str(dim) + ').')

        if dim % self.groups != 0:
            raise ValueError('Number of groups (' + str(self.groups) + ') must be a '
                             'multiple of the number of channels (' +
                             str(dim) + ').')

        self.input_spec = InputSpec(ndim=len(input_shape),
                                    axes={self.axis: dim})
        shape = (dim,)

        if self.scale:
            self.gamma = self.add_weight(shape=shape,
                                         name='gamma',
                                         initializer=self.gamma_initializer,
                                         regularizer=self.gamma_regularizer,
                                         constraint=self.gamma_constraint)
        else:
            self.gamma = None
        if self.center:
            self.beta = self.add_weight(shape=shape,
                                        name='beta',
                                        initializer=self.beta_initializer,
                                        regularizer=self.beta_regularizer,
                                        constraint=self.beta_constraint)
        else:
            self.beta = None
        self.built = True

    def call(self, inputs, **kwargs):
        input_shape = K.int_shape(inputs)
        tensor_input_shape = K.shape(inputs)

        # Prepare broadcasting shape.
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis] // self.groups
        broadcast_shape.insert(1, self.groups)

        reshape_group_shape = K.shape(inputs)
        group_axes = [reshape_group_shape[i] for i in range(len(input_shape))]
        group_axes[self.axis] = input_shape[self.axis] // self.groups
        group_axes.insert(1, self.groups)

        # reshape inputs to new group shape
        group_shape = [group_axes[0], self.groups] + group_axes[2:]
        group_shape = K.stack(group_shape)
        inputs = K.reshape(inputs, group_shape)

        group_reduction_axes = list(range(len(group_axes)))
        group_reduction_axes = group_reduction_axes[2:]

        mean = K.mean(inputs, axis=group_reduction_axes, keepdims=True)
        variance = K.var(inputs, axis=group_reduction_axes, keepdims=True)

        inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))

        # prepare broadcast shape
        inputs = K.reshape(inputs, group_shape)
        outputs = inputs

        # In this case we must explicitly broadcast all parameters.
        if self.scale:
            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
            outputs = outputs * broadcast_gamma

        if self.center:
            broadcast_beta = K.reshape(self.beta, broadcast_shape)
            outputs = outputs + broadcast_beta

        outputs = K.reshape(outputs, tensor_input_shape)

        return outputs

    def get_config(self):
        config = {
            'groups': self.groups,
            'axis': self.axis,
            'epsilon': self.epsilon,
            'center': self.center,
            'scale': self.scale,
            'beta_initializer': initializers.serialize(self.beta_initializer),
            'gamma_initializer': initializers.serialize(self.gamma_initializer),
            'beta_regularizer': regularizers.serialize(self.beta_regularizer),
            'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
            'beta_constraint': constraints.serialize(self.beta_constraint),
            'gamma_constraint': constraints.serialize(self.gamma_constraint)
        }
        base_config = super(GroupNormalization, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape

In [4]:
# Specify image size
IMG_WIDTH = 456
IMG_HEIGHT = 456
CHANNELS = 3


# Load in EfficientNetB5
effnet = EfficientNetB5(weights=None,
                        include_top=False,
                        input_shape=(IMG_WIDTH, IMG_HEIGHT, CHANNELS))
effnet.load_weights('../input/efficientnet-keras-weights-b0b5/efficientnet-b5_imagenet_1000_notop.h5')

In [5]:
# Replace all Batch Normalization layers by Group Normalization layers
for i, layer in enumerate(effnet.layers):
    if "batch_normalization" in layer.name:
        effnet.layers[i] = GroupNormalization(groups=4, axis=-1, epsilon=0.00001)

In [6]:
def build_model():
    """
    A custom implementation of EfficientNetB5
    for the APTOS 2019 competition
    
    """
    model = Sequential()
    model.add(effnet)
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.1))
    model.add(Dense(2, activation="sigmoid"))

    print(model.summary())
    return model


In [7]:
# Initialize model
modelA = build_model()
modelB = build_model()
modelC = build_model()
modelD = build_model()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnet-b5 (Model)      (None, 15, 15, 2048)      28168048  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 2048)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 4098      
Total params: 28,172,146
Trainable params: 28,172,146
Non-trainable params: 0
_________________________________________________________________
None
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnet-b5 (Model)      (None, 15, 15, 2048)      28168048  
___________________________________________________________

In [8]:
os.listdir('../input')

['efficientnet-keras-weights-b0b5',
 'aptos-01b',
 'aptos-01a',
 'aptos2019-blindness-detection',
 'efficientnet',
 'aptos-01c',
 'aptos-binary-a',
 'aptos-01d']

In [9]:
#modelA.load_weights('../input/aptos-binary-a/model.h5')
modelA.load_weights('../input/aptos-01a/model.h5')


modelB.load_weights('../input/aptos-01b/model.h5')


modelC.load_weights('../input/aptos-01c/model.h5')


modelD.load_weights('../input/aptos-01d/model.h5')


In [10]:
KAGGLE_DIR = '../input/aptos2019-blindness-detection/'
TEST_DF_PATH = KAGGLE_DIR + 'test.csv'
TEST_IMG_PATH = KAGGLE_DIR + 'test_images/'

In [11]:
test_df = pd.read_csv(TEST_DF_PATH)
# Add extension to id_code
test_df['id_code'] = test_df['id_code'] + ".png"
print(f"Testing Images: {test_df.shape[0]}")
display(test_df.head())

Testing Images: 1928


Unnamed: 0,id_code
0,0005cfc8afb6.png
1,003f0afdcd15.png
2,006efc72b638.png
3,00836aaacf06.png
4,009245722fa4.png


In [12]:
def crop_image_from_gray(img, tol=7):
    """
    Applies masks to the orignal image and 
    returns the a preprocessed image with 
    3 channels
    """
    # If for some reason we only have two channels
    if img.ndim == 2:
        mask = img > tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    # If we have a normal RGB images
    elif img.ndim == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img > tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            img = np.stack([img1,img2,img3],axis=-1)
        return img

def preprocess_image(image, sigmaX=10):
    """
    The whole preprocessing pipeline:
    1. Read in image
    2. Apply masks
    3. Resize image to desired size
    4. Add Gaussian noise to increase Robustness
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
    image = cv2.addWeighted (image,4, cv2.GaussianBlur(image, (0,0) ,sigmaX), -4, 128)
    return image

In [13]:
BATCH_SIZE = 1
# Place holder for diagnosis column
test_df['diagnosis'] = np.zeros(test_df.shape[0]) 
# For preprocessing test images
test_generator = ImageDataGenerator(preprocessing_function=preprocess_image, 
                                    rescale=1/ 128.).flow_from_dataframe(test_df, 
                                                                          x_col='id_code', 
                                                                          y_col='diagnosis',
                                                                          directory=TEST_IMG_PATH,
                                                                          target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                                          batch_size=BATCH_SIZE,
                                                                          class_mode='other',
                                                                          shuffle=False)

Found 1928 validated image filenames.


In [14]:
test_df_a1_ = test_df

In [15]:
len(test_df_a1_)

1928

### A

In [16]:
# make a prediction
predictions = modelA.predict_generator(test_generator, steps=len(test_df_a1_), verbose=1)



In [17]:
predictions.shape

(1928, 2)

In [18]:
class_weights= np.array([0.98, 1])
a=predictions*class_weights

In [19]:
y_pred = np.argmax(a, axis=1)

In [20]:
test_df_a1_['P0/P1'] = y_pred

test_df_a1_.describe()

Unnamed: 0,diagnosis,P0/P1
count,1928.0,1928.0
mean,0.0,0.711618
std,0.0,0.453127
min,0.0,0.0
25%,0.0,0.0
50%,0.0,1.0
75%,0.0,1.0
max,0.0,1.0


In [21]:
test_df_a1_['P0/P1'].value_counts()

1    1372
0     556
Name: P0/P1, dtype: int64

In [22]:
test_df_a1 = test_df_a1_[test_df_a1_['P0/P1']==1]
test_df_A0 = test_df_a1_[test_df_a1_['P0/P1']==0] #0

In [23]:
test_df_a1.shape

(1372, 3)

### B

In [24]:
test_generator = ImageDataGenerator(preprocessing_function=preprocess_image, 
                                    rescale=1 / 128.).flow_from_dataframe(test_df_a1.iloc[:,:2], 
                                                                          x_col='id_code', 
                                                                          y_col='diagnosis',
                                                                          directory=TEST_IMG_PATH,
                                                                          target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                                          batch_size=BATCH_SIZE,
                                                                          class_mode='other',
                                                                          shuffle=False)

Found 1372 validated image filenames.


In [25]:
# make a prediction
predictions = modelB.predict_generator(test_generator, steps=len(test_df_a1), verbose=1)
predictions.shape



(1372, 2)

In [26]:
class_weights= np.array([0.94, 1])
b=predictions*class_weights

In [27]:
y_pred = np.argmax(b, axis=1)

In [28]:
test_df_b1_ = test_df_a1.iloc[:,:2]
test_df_b1_['P1/P2'] = y_pred

test_df_b1_.describe()

Unnamed: 0,diagnosis,P1/P2
count,1372.0,1372.0
mean,0.0,0.793732
std,0.0,0.404773
min,0.0,0.0
25%,0.0,1.0
50%,0.0,1.0
75%,0.0,1.0
max,0.0,1.0


In [29]:
test_df_b1_['P1/P2'].value_counts()

1    1089
0     283
Name: P1/P2, dtype: int64

In [30]:
test_df_b1 = test_df_b1_[test_df_b1_['P1/P2']==1]
test_df_B0 = test_df_b1_[test_df_b1_['P1/P2']==0] #1

In [31]:
test_df_b1.shape

(1089, 3)

### C

In [32]:
test_generator = ImageDataGenerator(preprocessing_function=preprocess_image, 
                                    rescale=1 / 128.).flow_from_dataframe(test_df_b1.iloc[:,:2], 
                                                                          x_col='id_code', 
                                                                          y_col='diagnosis',
                                                                          directory=TEST_IMG_PATH,
                                                                          target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                                          batch_size=BATCH_SIZE,
                                                                          class_mode='other',
                                                                          shuffle=False)

Found 1089 validated image filenames.


In [33]:
# make a prediction
predictions = modelC.predict_generator(test_generator, steps=len(test_df_b1), verbose=1)
predictions.shape



(1089, 2)

In [34]:
class_weights= np.array([0.685, 1])
c=predictions*class_weights

In [35]:
y_pred = np.argmax(c, axis=1)

In [36]:
test_df_c1_ = test_df_b1.iloc[:,:2]
test_df_c1_['P2/P3'] = y_pred

test_df_c1_.describe()

Unnamed: 0,diagnosis,P2/P3
count,1089.0,1089.0
mean,0.0,0.146924
std,0.0,0.354193
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,0.0,0.0
max,0.0,1.0


In [37]:
test_df_c1_['P2/P3'].value_counts()

0    929
1    160
Name: P2/P3, dtype: int64

In [38]:
test_df_c1 = test_df_c1_[test_df_c1_['P2/P3']==1]
test_df_C0 = test_df_c1_[test_df_c1_['P2/P3']==0] #2

In [39]:
test_df_c1.shape

(160, 3)

### D

In [40]:
test_generator = ImageDataGenerator(preprocessing_function=preprocess_image, 
                                    rescale=1 / 128.).flow_from_dataframe(test_df_c1.iloc[:,:2], 
                                                                          x_col='id_code', 
                                                                          y_col='diagnosis',
                                                                          directory=TEST_IMG_PATH,
                                                                          target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                                          batch_size=BATCH_SIZE,
                                                                          class_mode='other',
                                                                          shuffle=False)

Found 160 validated image filenames.


In [41]:
# make a prediction
predictions = modelD.predict_generator(test_generator, steps=len(test_df_c1), verbose=1)
predictions.shape



(160, 2)

In [42]:
class_weights= np.array([0.47, 1])
d=predictions*class_weights

In [43]:
y_pred = np.argmax(d, axis=1)

In [44]:
test_df_D1_ = test_df_c1.iloc[:,:2]
test_df_D1_['P3/P4'] = y_pred

In [45]:
test_df_D1_.describe()

Unnamed: 0,diagnosis,P3/P4
count,160.0,160.0
mean,0.0,0.23125
std,0.0,0.422956
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,0.0,0.0
max,0.0,1.0


In [46]:
test_df_D1_['P3/P4'].value_counts()

0    123
1     37
Name: P3/P4, dtype: int64

In [47]:
test_df_D1 = test_df_D1_[test_df_D1_['P3/P4']==1] #4
test_df_D0 = test_df_D1_[test_df_D1_['P3/P4']==0] #3

In [48]:
test_df_D1.shape

(37, 3)

In [49]:
drop=['P0/P1']
test_df_A0 = test_df_A0.drop(drop, axis=1, inplace=False)
drop=['P1/P2']
test_df_B0 = test_df_B0.drop(drop, axis=1, inplace=False)
drop=['P2/P3']
test_df_C0 = test_df_C0.drop(drop, axis=1, inplace=False)
drop=['P3/P4']
test_df_D0 = test_df_D0.drop(drop, axis=1, inplace=False)
drop=['P3/P4']
test_df_D1 = test_df_D1.drop(drop, axis=1, inplace=False)

In [50]:
test_df_A0['diagnosis'] = 0
test_df_B0['diagnosis'] = 1
test_df_C0['diagnosis'] = 2
test_df_D0['diagnosis'] = 3
test_df_D1['diagnosis'] = 4

In [51]:
test_df_01 = pd.concat([test_df_A0,test_df_B0,test_df_C0,test_df_D0,test_df_D1],axis=0)

In [52]:
test_df = pd.read_csv(TEST_DF_PATH)
#Remove .png from ids
test_df_01['id_code'] = test_df_01['id_code'].str.replace(r'.png$', '')

In [53]:
test_df = pd.merge(test_df,test_df_01, how = 'left',
                left_on  = ['id_code'],
                right_on = ['id_code'], copy=False)

In [54]:
#test_df['diagnosis'] = P0['Q'] + P0['Q']*P1['Q'] + P0['Q']*P1['Q']*P2['Q'] + P0['Q']*P1['Q']*P2['Q']*P4['Q']

## FINAL SUB.

In [55]:
#WRITE
test_df.to_csv('submission.csv', index=False)

print(" Submission Completed;) ")

 Submission Completed;) 


In [56]:
test_df.head(71)

Unnamed: 0,id_code,diagnosis
0,0005cfc8afb6,2
1,003f0afdcd15,0
2,006efc72b638,2
3,00836aaacf06,2
4,009245722fa4,3
5,009c019a7309,2
6,010d915e229a,3
7,0111b949947e,1
8,01499815e469,0
9,0167076e7089,1
