In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
import os
import multiprocessing
from multiprocessing.pool import ThreadPool
import albumentations as A
from sklearn.metrics import *

In [None]:
df = pd.read_csv('../input/aptos2019-blindness-detection/train.csv')

In [None]:
df['id_code']+='.png'

In [None]:
df['diagnosis'] = df['diagnosis'].map(str)

In [None]:
df.head()

In [None]:
df['diagnosis'].dtype

In [None]:
#augmentations
transform = A.Compose([
    A.Blur(p=0.5,),
    A.Flip(p=0.5),
    A.RandomBrightnessContrast(p=0.1,brightness_limit=1,contrast_limit=1,brightness_by_max=False),
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=(0,0.350), rotate_limit=45, p=0.6),
    A.ElasticTransform(p= 0.3,),
    A.GridDistortion(p = 0.3,distort_limit=0.25,interpolation=cv2.INTER_AREA),
    A.HueSaturationValue(p = 0.3,hue_shift_limit=5,sat_shift_limit=6,val_shift_limit=5),
    A.CLAHE(p=0.7,),
    A.CoarseDropout(p = 0.4)
])

In [None]:
#custom generator attempt
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, directory, x_col, y_col=None, batch_size=32, num_classes=None,target_size = (256,256),preprocess = None, shuffle=True):
        self.batch_size = batch_size
        self.df = df
        self.directory = directory
        self.indices = self.df.index.tolist()
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.x_col = x_col
        self.y_col = y_col
        self.target_size = target_size
        self.preprocess = preprocess
        self.on_epoch_end()

    def __len__(self):
        return len(self.indices) // self.batch_size

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
        
        X, y = self.__get_data(batch)
        return X, y

    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X = []# logic
        y = []# logic
        
        for i, id in enumerate(batch):
            a,b = self.df.loc[id][0], self.df.loc[id][1]
            img = self.preprocess(plt.imread(directory+'/'+a))
            
            X.append(img) # logic
            y.append(b)  # labels

        return X, y

In [None]:
BATCH_SIZE = 8
EPOCHS = 40
WARMUP_EPOCHS = 2
LEARNING_RATE = 1e-6
WARMUP_LEARNING_RATE = 4e-5
HEIGHT = 320
WIDTH = 320
CANAL = 3
N_CLASSES = 5
ES_PATIENCE = 5
RLROP_PATIENCE = 3
DECAY_DROP = 0.5

In [None]:
#preprocessing function use with ImageDataGenerator
def wrapper_preprocess():
    def preprocess_fn(image):
        image = transform(image = (image*255).astype('uint8'))['image']
        return image/255
    return preprocess_fn

In [None]:
ppf = wrapper_preprocess()

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
gen = ImageDataGenerator(#preprocessing_function=ppf,
                        validation_split=0.05)

In [None]:
train_generator= gen.flow_from_dataframe(dataframe=df,
                                                      directory="../input/project1/1",
                                                      x_col="id_code",
                                                      y_col="diagnosis",
                                                      batch_size=BATCH_SIZE,
                                                      class_mode="categorical",
                                                      target_size=(512, 512),
                                                      subset='training')

valid_generator=gen.flow_from_dataframe(dataframe=df,
                                                      directory="../input/project1/1",
                                                      x_col="id_code",
                                                      y_col="diagnosis",
                                                      batch_size=BATCH_SIZE,
                                                      class_mode="categorical",    
                                                      target_size=(512, 512),
                                                      subset='validation')
    

In [None]:
model = tf.keras.models.load_model('../input/model-file/ATOPS_dr_EffnetB5(R2).h5')

In [None]:
model.summary()

In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(lr=WARMUP_LEARNING_RATE),loss = 'categorical_crossentropy',metrics = ['accuracy'])


In [None]:
model.fit(train_generator, validation_data = valid_generator,epochs = 2,)

In [None]:
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=ES_PATIENCE, restore_best_weights=True, verbose=1)
rlrop = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=RLROP_PATIENCE, factor=DECAY_DROP, min_lr=1e-6, verbose=1)
ckpt = tf.keras.callbacks.ModelCheckpoint('b5_last_train.h5')
callback_list = [es, rlrop,ckpt]
optimizer = tf.keras.optimizers.Adam(lr=LEARNING_RATE)
model.compile(optimizer=optimizer, loss="binary_crossentropy",  metrics=['accuracy'])

In [None]:
for layer in model.layers:
    layer.trainabe  = True
    

In [None]:
model.summary()

In [None]:
for layer in model.layers[0].layers[:]:
    if not isinstance(layer, tf.keras.layers.BatchNormalization):
        layer.trainable = True

In [None]:
model.fit(train_generator, validation_data = valid_generator,epochs = 6,callbacks = callback_list)

In [None]:
tf.keras.models.save_model(model, './b5_retrained.h5')

In [None]:
complete_datagen = ImageDataGenerator()
complete_generator = complete_datagen.flow_from_dataframe(dataframe=df,
                                                          directory = "../input/project1/1",
                                                          x_col="id_code",
                                                          target_size=(512,512),
                                                          batch_size=1,
                                                          shuffle=False,
                                                          class_mode=None)

STEP_SIZE_COMPLETE = complete_generator.n//complete_generator.batch_size
train_preds = model.predict(complete_generator, steps=STEP_SIZE_COMPLETE,verbose = 1)
train_preds = [np.argmax(pred) for pred in train_preds]

In [None]:
print("Train Cohen Kappa score: %.3f" % cohen_kappa_score(train_preds, df['diagnosis'].astype('int'), weights='quadratic'))
print("Train Accuracy score : %.3f" % accuracy_score(df['diagnosis'].astype('int'),train_preds))

In [None]:
def plot_conf_matrix(true,pred,classes):
    cf = confusion_matrix(true, pred)
    
    df_cm = pd.DataFrame(cf, range(len(classes)), range(len(classes)))
    plt.figure(figsize=(8,5.5))
    sns.set(font_scale=1.4)
    sns.heatmap(df_cm, annot=True, annot_kws={"size": 16},xticklabels = classes ,yticklabels = classes,fmt='g')
    #sns.heatmap(df_cm, annot=True, annot_kws={"size": 16})
    plt.show()

In [None]:
import seaborn as sns

In [None]:
labels = ['0 - No DR', '1 - Mild', '2 - Moderate', '3 - Severe', '4 - Proliferative DR']
plot_conf_matrix(list(df['diagnosis'].astype(int)),train_preds,labels)

In [None]:
cnf_matrix = confusion_matrix(df['diagnosis'].astype('int'), train_preds)
cnf_matrix_norm = cnf_matrix.astype('float') / cnf_matrix.sum(axis=1)[:, np.newaxis]
df_cm = pd.DataFrame(cnf_matrix_norm, index=labels, columns=labels)
plt.figure(figsize=(16, 7))
sns.heatmap(df_cm, annot=True, fmt='.2f', cmap="Blues")
plt.show()