In [None]:
import os
import sys

print(os.listdir("../input/efficientnet/efficientnet-master/efficientnet-master/efficientnet"))
sys.path.append(os.path.abspath('../input/efficientnet/efficientnet-master/efficientnet-master/'))

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import cv2
import math

import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator

from keras.layers import Conv2D, Dense, Dropout, BatchNormalization, Input, Flatten, MaxPooling2D, Activation, GlobalAveragePooling2D
from keras.models import Sequential, Model
from keras.utils import np_utils
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback, LearningRateScheduler

import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score

from efficientnet import EfficientNetB5

print(os.listdir("../input"))

## Explore Data

In [None]:
train_df = pd.read_csv('../input/aptos2019-blindness-detection/train.csv')
train_df_ex = pd.read_csv('../input/diabetic-retinopathy-resized/trainLabels.csv')

In [None]:
print(train_df.shape)
train_df.head()

In [None]:
print(train_df_ex.shape)
train_df_ex.head()

In [None]:
#check NaNs
print('Train')
print(train_df.isnull().sum())
print("=====")
print('Train Extra')
print(train_df_ex.isnull().sum())

In [None]:
#Distribution of diagnosis
print(train_df['diagnosis'].value_counts())
train_df['diagnosis'].value_counts().plot(kind='bar')
plt.show()

In [None]:
#Distribution of diagnosis
print(train_df_ex['level'].value_counts())
train_df_ex['level'].value_counts().plot(kind='bar')
plt.show()

In [None]:
#show samples of fundus images
sample_nos = [4]

for no in sample_nos:
    sample_train_id = train_df['id_code'][no]
    sample_train_diagnosis = train_df['diagnosis'][no]
    result = "Diagnosis="+str(sample_train_diagnosis)

    sample_image = Image.open('../input/aptos2019-blindness-detection/train_images/'+sample_train_id+'.png')
    img_list = np.asarray(sample_image)
    print(img_list.shape)
    plt.imshow(img_list)
    plt.title("Diagnosis="+str(sample_train_diagnosis))
    plt.show()


In [None]:
#show samples of fundus images
sample_nos = [4]

for no in sample_nos:
    sample_train_id = train_df_ex['image'][no]
    sample_train_diagnosis = train_df_ex['level'][no]
    result = "Diagnosis="+str(sample_train_diagnosis)

    sample_image = Image.open('../input/diabetic-retinopathy-resized/resized_train/resized_train/'+sample_train_id+'.jpeg')
    img_list = np.asarray(sample_image)
    print(img_list.shape)
    plt.imshow(img_list)
    plt.title("Diagnosis="+str(sample_train_diagnosis))
    plt.show()

* There are bias in dataset. -> Try taking balance by data augmenting.
* The shape of each image is different. -> Reshape each image to the same appropriate size.

### There are duplicated images with different labels


In [None]:
dup_info = pd.read_csv('../input/aptos2019-duplicated-images-info/duplicated_info.csv')
dup_info.head()

In [None]:
#add id_code info (extracting from path)
dup_info['id_code'] = [s.split('/')[3].split('.')[0] for s in dup_info['path'].values]
dup_info.head()

In [None]:
dup_info['diagnosis'].value_counts()

In [None]:
train_df_new = train_df.copy()
for code in dup_info['id_code']:
    train_df_new = train_df_new[train_df_new['id_code'] != code]
print(train_df_new.shape)

In [None]:
train_df_new['diagnosis'].value_counts()

### Retrieve some data from extra train dataset

In [None]:
ex_0 = pd.DataFrame(train_df_ex[train_df_ex['level'] == 0].sample(frac=1, random_state=42)[:500])
ex_1 = pd.DataFrame(train_df_ex[train_df_ex['level'] == 1].sample(frac=1, random_state=42)[:600])
ex_2 = pd.DataFrame(train_df_ex[train_df_ex['level'] == 2].sample(frac=1, random_state=42)[:900])
ex_3 = pd.DataFrame(train_df_ex[train_df_ex['level'] == 3].sample(frac=1, random_state=42)[:300])
ex_4 = pd.DataFrame(train_df_ex[train_df_ex['level'] == 4].sample(frac=1, random_state=42)[:100])

print(ex_0.shape)
print(ex_1.shape)
print(ex_2.shape)
print(ex_3.shape)
print(ex_4.shape)




In [None]:

ex_df = pd.concat([ex_0, ex_1, ex_2, ex_3, ex_4])
print(ex_df.shape)

del ex_0
del ex_1
del ex_2
del ex_3
del ex_4


### Image Decolorization & Cropping & preprocessing etc.

In [None]:
IMG_SIZE = 256
NUM_CLASSES = 5
INPUT_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

BATCH_SIZE = 24
EPOCHS = 80

WEIGHTS_PATH = '../input/efficientnet-keras-weights-b0b5/efficientnet-b5_imagenet_1000_notop.h5'

In [None]:
def crop_image1(img,tol=7):
    # img is image data
    # tol  is tolerance
        
    mask = img>tol
    return img[np.ix_(mask.any(1),mask.any(0))]

def crop_image_from_gray(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img>tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
    #         print(img1.shape,img2.shape,img3.shape)
            img = np.stack([img1,img2,img3],axis=-1)
        
        return img



In [None]:
ex_df = ex_df.reset_index()
ex_df.head()

In [None]:
sample_no = 100

sample_train_id = train_df['id_code'][sample_no]
sample_train_diagnosis = train_df['diagnosis'][sample_no]

#sample_train_id = ex_df['image'][sample_no]
#sample_train_diagnosis = ex_df['level'][sample_no]
result = "Diagnosis="+str(sample_train_diagnosis)


sample_img = cv2.imread('../input/aptos2019-blindness-detection/train_images/'+sample_train_id+'.png')
#sample_img = cv2.imread('../input/diabetic-retinopathy-resized/resized_train/resized_train/'+sample_train_id+'.jpeg')

def resize_decolor(image, sigmaX=10):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    image = cv2.addWeighted (image,4, cv2.GaussianBlur( image , (0,0) , sigmaX) ,-4 ,128)
    return image

#img = cv2.blur(sample_img, (5, 5))
img = resize_decolor(sample_img)
print(img.shape)

plt.imshow(img)
plt.title(result)
plt.show()

### Data Preprocessing

In [None]:
X_imgs = []
y_diagnosis = []
id_zeros = 0
for key, row in train_df_new.sample(frac=1, random_state=42).iterrows():
    code = row['id_code']
    diagnosis = row['diagnosis']
    
    if diagnosis == 0:
        id_zeros += 1
        if id_zeros > 500:
            continue
    
    image_data = cv2.imread('../input/aptos2019-blindness-detection/train_images/'+code+'.png')
    img = resize_decolor(image_data)
    
    X_imgs.append(img)
    y_diagnosis.append(diagnosis)

print(len(X_imgs))
print(len(y_diagnosis))
    

In [None]:
for key, row in ex_df.sample(frac=1, random_state=42).iterrows():
    code = row['image']
    diagnosis = row['level']
    
    image_data = cv2.imread('../input/diabetic-retinopathy-resized/resized_train/resized_train/' + code + '.jpeg')
    img = resize_decolor(image_data)
    
    X_imgs.append(img)
    y_diagnosis.append(diagnosis)
    

print(len(X_imgs))
print(len(y_diagnosis))


In [None]:
X_imgs = np.array(X_imgs)
y_diagnosis = np.array(y_diagnosis)

print(X_imgs.shape)
print(y_diagnosis.shape)

In [None]:
pd.Series(y_diagnosis).value_counts()

In [None]:
y_diagnosis_cat = np_utils.to_categorical(y_diagnosis, NUM_CLASSES)
y_multi = np.empty(y_diagnosis_cat.shape, dtype=y_diagnosis.dtype)
y_multi[:, 4] = y_diagnosis_cat[:, 4]

for i in range(3, -1, -1):
    y_multi[:, i] = np.logical_or(y_diagnosis_cat[:, i], y_multi[:, i+1])
    
print(y_multi.sum(axis=0))

In [None]:
tr_imgs, val_imgs, tr_y, val_y = train_test_split(X_imgs, y_multi, test_size = 0.1, random_state = 42)
train_imgs, cv_imgs, train_y, cv_y = train_test_split(tr_imgs, tr_y, test_size = 0.1, random_state = 42)

del X_imgs
del tr_imgs
del tr_y

print(train_imgs.shape)
print(cv_imgs.shape)
print(val_imgs.shape)
print(train_y.shape)
print(cv_y.shape)
print(val_y.shape)

### Define Model

In [None]:
tf.set_random_seed(0)

In [None]:
def efficientnet_model(file_path, input_shape, num_classes):
    input_tensor = Input(shape=input_shape)
    model = Sequential()
    base_model = EfficientNetB5(include_top=False,
                             weights=None,
                             input_tensor=input_tensor)
    base_model.load_weights(file_path)
    
    for layer in base_model.layers:
        layer.trainable = False
        
        # unfreeze batch_norm layers
        if layer.name.startswith('batch_normalization'):
            layer.trainable = True
        if layer.name.endswith('bn'):
            layer.trainable = True
    
    
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.5))
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    model.add(Dense(num_classes, activation='sigmoid'))

    
    return model



In [None]:
np.random.seed(42)
model = efficientnet_model(WEIGHTS_PATH, INPUT_SHAPE, NUM_CLASSES)

In [None]:
model.compile(loss = 'binary_crossentropy',
             optimizer = Adam(lr=0.001),
             metrics = ['accuracy'])
model.summary()

In [None]:
EARLY_STOPPING = EarlyStopping(monitor='val_loss', patience=25, verbose=1, mode='auto')

MODEL_CHECKPOINT = ModelCheckpoint(filepath = 'effnet_best_acc_model.h5',
                                  monitor='val_acc',
                                  verbose=1,
                                  save_best_only=True,
                                  save_weights_only=False,
                                  mode='max',
                                  period=1)

def step_decay(epoch):
    x = 0.001
    if epoch >= 20: x = 0.00075
    return x

LR_DECAY = LearningRateScheduler(step_decay)

In [None]:
class Metrics(Callback):
    def on_train_begin(self, logs={}):
        self.val_kappas = []

    def on_epoch_end(self, epoch, logs={}):
        X_val, y_val = self.validation_data[:2]
        y_val = y_val.sum(axis=1) - 1
        
        y_pred = self.model.predict(X_val) > 0.5
        y_pred = y_pred.astype(int).sum(axis=1) - 1
        

        _val_kappa = cohen_kappa_score(
            y_val, 
            y_pred, 
            weights='quadratic'
        )

        self.val_kappas.append(_val_kappa)

        print(f"val_kappa: {_val_kappa:.4f}")
        
        if _val_kappa == max(self.val_kappas):
            print("Validation Kappa has improved. Saving model.")
            self.model.save('best_kappa_model.h5')

        return

In [None]:
kappa_metrics = Metrics()

In [None]:
augmentation_params = {
    'rescale':1./255,
    'rotation_range': 20,
    'zoom_range': 0.05,
    'vertical_flip': True,
    'horizontal_flip': True,
    'width_shift_range': 0.05
}

In [None]:
traingen = ImageDataGenerator(**augmentation_params)

In [None]:
cv_imgs = cv_imgs / 255.

In [None]:
history = model.fit_generator(traingen.flow(train_imgs, train_y, batch_size = BATCH_SIZE),
                    steps_per_epoch = len(train_imgs)//BATCH_SIZE,
                    epochs = EPOCHS,
                    verbose = 1,
                    validation_data = (cv_imgs, cv_y),
                    callbacks=[EARLY_STOPPING, MODEL_CHECKPOINT, kappa_metrics, LR_DECAY])

In [None]:
del cv_imgs

In [None]:
import matplotlib.pyplot as plt

#plot loss history & kappa
fig = plt.figure(figsize=(8, 6))
plt.plot(history.history['acc'], color='r', label='acc')
plt.plot(history.history['val_acc'], label='val_acc')
plt.legend()

plt.xlabel("epoch")
plt.ylabel("loss")

plt.vlines(np.argmax(history.history['val_acc']), 0.5, 1)
plt.title(np.max(history.history['val_acc']))

plt.show()

plt.plot(kappa_metrics.val_kappas)
plt.vlines(np.argmax(kappa_metrics.val_kappas), 0.5, 1.0)
plt.title('kappa score')
plt.show()

In [None]:
model.save('aptos_effnet_model.h5', include_optimizer=False)
np.save('val_imgs.npy', val_imgs)
np.save('val_y.npy', val_y)

In [None]:
from IPython.display import FileLink
FileLink('effnet_best_acc_model.h5')

In [None]:
FileLink('best_kappa_model.h5')

In [None]:
FileLink('val_imgs.npy')

In [None]:
FileLink('val_y.npy')