In [1]:
!ls /kaggle/input

aptos-model  aptos-tpu	ocular-disease-recognition-odir5k  odir-labels


In [2]:
%%capture
!pip install efficientnet
!pip install tensorflow-addons
!pip install focal-loss

In [3]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf, tensorflow.keras.backend as K
from tensorflow.keras.layers import Dense,Conv2D,Concatenate,GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras import optimizers
from kaggle_datasets import KaggleDatasets
import math
import tensorflow_addons as tfa


In [4]:
AUTO = tf.data.experimental.AUTOTUNE
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

GCS_DS_PATH = KaggleDatasets().get_gcs_path('ocular-disease-recognition-odir5k')
print(GCS_DS_PATH)

Running on TPU  grpc://10.0.0.2:8470
REPLICAS:  8
gs://kds-bfef3f07e2d6c94a647cc6f9bb3b34a053dfec1877aab16008b0c9d1


In [5]:
strategy.num_replicas_in_sync

8

In [6]:
img_shape=456
BATCH_SIZE = 5*strategy.num_replicas_in_sync
BATCH_SIZE

40

In [7]:
def train_format_path(st):
    return GCS_DS_PATH + '/ODIR-5K/ODIR-5K/Training Images/' + st 

def test_format_path(st):
    return GCS_DS_PATH + '/ODIR-5K/ODIR-5K/Testing Images/' + st 


In [8]:
train = pd.read_excel('../input/odir-labels/ODIR-5K_Training_Annotations(Updated)_V2.xlsx')
train.head()

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,0,69,Female,0_left.jpg,0_right.jpg,cataract,normal fundus,0,0,0,1,0,0,0,0
1,1,57,Male,1_left.jpg,1_right.jpg,normal fundus,normal fundus,1,0,0,0,0,0,0,0
2,2,42,Male,2_left.jpg,2_right.jpg,laser spot，moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,1
3,3,66,Male,3_left.jpg,3_right.jpg,normal fundus,branch retinal artery occlusion,0,0,0,0,0,0,0,1
4,4,53,Male,4_left.jpg,4_right.jpg,macular epiretinal membrane,mild nonproliferative retinopathy,0,1,0,0,0,0,0,1


In [9]:
test = pd.read_csv('../input/odir-labels/XYZ_ODIR.csv')
test.head()

Unnamed: 0,ID,N,D,G,C,A,H,M,O
0,937,0,0,0,0,0,0,0,0
1,967,0,0,0,0,0,0,0,0
2,988,0,0,0,0,0,0,0,0
3,995,0,0,0,0,0,0,0,0
4,1000,0,0,0,0,0,0,0,0


In [10]:
left_test=[]
right_test=[]
ID=[]
for i in test['ID']:
  ID.append(i)
  left_test.append(str(i)+'_left.jpg')
  right_test.append(str(i)+'_right.jpg')

name_df=pd.DataFrame(zip(ID,left_test,right_test),columns=['ID','Left-Fundus','Right-Fundus'])
test = pd.merge(name_df, test, on='ID')
test.head()

Unnamed: 0,ID,Left-Fundus,Right-Fundus,N,D,G,C,A,H,M,O
0,937,937_left.jpg,937_right.jpg,0,0,0,0,0,0,0,0
1,967,967_left.jpg,967_right.jpg,0,0,0,0,0,0,0,0
2,988,988_left.jpg,988_right.jpg,0,0,0,0,0,0,0,0
3,995,995_left.jpg,995_right.jpg,0,0,0,0,0,0,0,0
4,1000,1000_left.jpg,1000_right.jpg,0,0,0,0,0,0,0,0


In [11]:
test_labels=test[['N','D','G','C','A','H','M','O']]

In [12]:
left_train=train['Left-Fundus']
right_train=train['Right-Fundus']

left_test=test['Left-Fundus']
right_test=test['Right-Fundus']

In [13]:
len(left_train),len(right_train),len(left_test),len(right_test)

(3500, 3500, 500, 500)

In [14]:
left_train_paths = left_train.apply(train_format_path).values
right_train_paths = right_train.apply(train_format_path).values

left_test_paths = left_test.apply(test_format_path).values
right_test_paths = right_test.apply(test_format_path).values




In [15]:
train_labels = train[['N','D','G','C','A','H','M','O']]
train_labels.head()

Unnamed: 0,N,D,G,C,A,H,M,O
0,0,0,0,1,0,0,0,0
1,1,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,1
3,0,0,0,0,0,0,0,1
4,0,1,0,0,0,0,0,1


In [16]:
def decode(img,image_size=(img_shape, img_shape)):
    bits = tf.io.read_file(img)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    image = tf.image.random_flip_left_right(image, seed=2020)
    image = tf.image.random_flip_up_down(image, seed=2020)
    image = tf.image.random_crop(image,size=[img_shape,img_shape,3],seed=2020 )
    image = tf.image.rot90(image)
    return image

def decode_image(left, right,labels=None ):
    if labels is None:
        return tf.stack([decode(left),decode(right)])
    else:
        return tf.stack([decode(left),decode(right)]),labels 
    


In [17]:
import efficientnet.tfkeras as efn
import tensorflow as tf, tensorflow.keras.backend as K
from tensorflow.keras.layers import concatenate,Activation,GlobalMaxPooling2D,Flatten,Dense,Dropout,Input,Reshape,Lambda,GlobalMaxPooling2D
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras import optimizers
from keras.utils.vis_utils import plot_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score,cohen_kappa_score,roc_auc_score


In [18]:
def parallel_model():
    input_layer = Input(shape=(2, img_shape,img_shape,3))
    left_input, right_input = Lambda(lambda x: tf.split(x, 2, axis=1))(input_layer)

    left_input = Reshape([img_shape, img_shape, 3])(left_input)
    right_input = Reshape([img_shape, img_shape, 3])(right_input)



    left_model = efn.EfficientNetB7(input_shape =  (img_shape,img_shape,3), include_top = False, weights = None,input_tensor=left_input)
    xl = left_model.output
    xl = GlobalMaxPooling2D()(xl)
    out1 = Dense(5, activation='softmax')(xl)
    left_model=tf.keras.Model(inputs=left_model.input, outputs=out1)
    left_model.load_weights('../input/aptos-model/aptos_model.hdf5')
    left_model=tf.keras.Model(left_model.input, left_model.layers[-3].output)
    
    
    
    right_model = efn.EfficientNetB7(input_shape =  (img_shape,img_shape,3), include_top = False, weights = None,input_tensor=right_input)
    xr = right_model.output
    xr = GlobalMaxPooling2D()(xr)
    outr = Dense(5, activation='softmax')(xr)
    right_model=tf.keras.Model(inputs=right_model.input, outputs=outr)
    right_model.load_weights('../input/aptos-model/aptos_model.hdf5')
    right_model=tf.keras.Model(right_model.input, right_model.layers[-3].output)

    for layer in right_model._layers:
        layer._name = layer._name + '_right'
    for layer in left_model._layers:
        layer._name = layer._name + '_left'

    left_model._name="left_eff"
    right_model._name="right_eff"


    con = concatenate([left_model.output, right_model.output])
    GAP= GlobalAveragePooling2D()(con)
    fc1=Dense(256, activation = 'relu')(GAP)
    fc2=Dense(128, activation = 'relu')(fc1)
    fc3=Dense(64, activation = 'relu')(fc2)
    out = Dense(8, activation = 'sigmoid')(fc3)

    model = tf.keras.Model(inputs=input_layer, outputs=out)
    return model


In [19]:
from tensorflow.keras.optimizers import Adam
from focal_loss import BinaryFocalLoss

def get_model():
    opt = Adam(lr=0.0003, decay=1e-3)

    with strategy.scope():
        model = parallel_model()
        #model.summary()

    model.compile(optimizer=opt, loss=BinaryFocalLoss(gamma=2),metrics=['accuracy'])
    return model

In [20]:
reduce_lr=[ReduceLROnPlateau(monitor='val_loss', factor=0.5,patience=3, min_lr=0.000001)]

In [21]:
from sklearn.model_selection import train_test_split
right_train,right_val, train_label_right, val_label_right= train_test_split(right_train_paths,train_labels, test_size = 500/3500,  random_state = 73)
left_train, left_val,train_label_left, val_label_left = train_test_split(left_train_paths,train_labels, test_size = 500/3500,  random_state = 73)

In [22]:
len(left_val),len(left_train)

(500, 3000)

In [23]:
(train_label_left==train_label_right).any()

N    True
D    True
G    True
C    True
A    True
H    True
M    True
O    True
dtype: bool

In [24]:

    
train_image=tf.data.Dataset.from_tensor_slices((left_train,right_train,train_label_left ))
train_dataset=train_image.map(decode_image, num_parallel_calls=AUTO).repeat().shuffle(512).batch(BATCH_SIZE).prefetch(AUTO)

val_image=tf.data.Dataset.from_tensor_slices((left_val,right_val,val_label_left ))
val_dataset=train_image.map(decode_image, num_parallel_calls=AUTO).repeat().batch(BATCH_SIZE).prefetch(AUTO)

test_image=tf.data.Dataset.from_tensor_slices((left_test_paths,right_test_paths ))
test_dataset=test_image.map(decode_image, num_parallel_calls=AUTO).batch(BATCH_SIZE)


model=get_model()
history = model.fit(train_dataset,
                steps_per_epoch=train_label_left.shape[0]/BATCH_SIZE,
                epochs=10,
                verbose=10,
                callbacks=[reduce_lr],
                validation_data=val_dataset,
                validation_steps=(len(val_label_left)/BATCH_SIZE)
                )
    



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [25]:
y_pred=model.predict(test_dataset,steps=(len(test_labels)/BATCH_SIZE))
y_pred.shape,test_labels.shape

((500, 8), (500, 8))

In [26]:
for i,j in enumerate(['N','D','G','C','A','H','M','O']):
    test[j]=y_pred[:,i]

In [27]:
test.drop(['Left-Fundus','Right-Fundus'],axis=1,inplace=True)

In [28]:
test.to_csv('tpu_B5.csv',index=False)