## Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn')
import pandas as pd
import cv2
import os

## Get Data

In [None]:
train_df = pd.read_csv('../input/aptos2019-blindness-detection/train.csv')
test_df = pd.read_csv('../input/aptos2019-blindness-detection/test.csv')
train_dir = '../input/aptos2019-blindness-detection/train_images'
test_dir = '../input/aptos2019-blindness-detection/test_images'

In [None]:
train_df.head()

In [None]:
test_df.head()

## Data Statistics

In [None]:
print('Num of training images: ', len(train_df))
print('Num of test images: ', len(test_df))

In [None]:
train_df.hist()
plt.title('Training Data Class Distribution')
plt.show()

In [None]:
_, counts = np.unique(train_df['diagnosis'].values, return_counts=True)
num_classes = len(counts)

for i in range(num_classes):
    print("Label {}: {} or {:.2f}%".format(i,counts[i],counts[i]/len(train_df)*100))

Clearly there is a large class imbalance in the training dataset. The labels are not provided for the test dataset so there is no information on their class distribution.

## Example Images

"DR leads to gradual changes in vasculature structure and resulting abnormalities such as microaneurysms, hemorrhages, hard exudates, and cotton wool spots. Along with the changes, there may be a presence of venous beading, retinal neovascularization which can be utilized to classify DR retinopathy in one of the two phases known as non-proliferative diabetic retinopathy (NPDR) and proliferative diabetic retinopathy (PDR)" - DeepDRiD Challenge

In [None]:
# n, the number of images to display, must be even
def plotExamples(ids, n):
  np.random.seed(0)
  rand_ids = ids[np.random.choice(len(ids),n)]

  fig = plt.figure(figsize=(15, 10))
  for i in range(n):
    fig.add_subplot(int(n/2),2,i+1)
    I = cv2.imread(os.path.join(train_dir, rand_ids[i]+".png"))
    I = cv2.cvtColor(I, cv2.COLOR_BGR2RGB)
    plt.imshow(I)
    plt.xlabel(rand_ids[i] + ".png")
    plt.grid(None)

Class 0 corresponds to no apparent DR: there should be no signs of abnormalities.

In [None]:
class0 = train_df.loc[train_df['diagnosis'] == 0, ['id_code']].values.flatten()
plotExamples(class0, 4)

Class 1 corresponds to mild NPDR: only presence of microaneurysms.

In [None]:
class1 = train_df.loc[train_df['diagnosis'] == 1, ['id_code']].values.flatten()
plotExamples(class1, 4)

Class 2 corresponds to moderate NPDR: more than just microaneurysms but less than severe NPDR.

In [None]:
class2 = train_df.loc[train_df['diagnosis'] == 2, ['id_code']].values.flatten()
plotExamples(class2, 4)

Class 3 corresponds to severe NPDR:

Moderate NPDR and any of the following:

• > 20 intraretinal hemorrhages

• Venous beading (localized increase in vein diameter)

• Intraretinal microvascular abnormalities

In [None]:
class3 = train_df.loc[train_df['diagnosis'] == 3, ['id_code']].values.flatten()
plotExamples(class3, 4)

Class 4 corresponds to PDR:

Severe NPDR and one or both of the following:

• Neovascularization

• Vitreous/preretinal hemorrhage

In [None]:
class4 = train_df.loc[train_df['diagnosis'] == 4, ['id_code']].values.flatten()
plotExamples(class4, 4)

## Preprocessing

In [None]:
import albumentations as A

### Crop Function

In [None]:
def cropEye(img, radius):
    mid_row = img[int(img.shape[0]/2),:,:].sum(1)
    r = (mid_row > mid_row.mean()/10).sum()/2
    s = radius*(1/r)
    I_r = cv2.resize(img,(0,0),fx=s,fy=s)
    center_row = int(I_r.shape[0]/2)
    center_col = int(I_r.shape[1]/2)
    start_x = max(center_row - radius, 0)
    end_x = center_row + radius
    start_y = max(center_col - radius, 0)
    end_y = center_col + radius
    I_cropped = I_r[start_x:end_x, start_y:end_y, :]
    if I_cropped.shape[0] != radius*2 or I_cropped.shape[1] != radius*2:
        I_cropped = cv2.resize(I_cropped, (radius*2,radius*2))
    return I_cropped

In [None]:
np.random.seed(0)
ids = train_df['id_code'].values.flatten()
rand_ids = ids[np.random.choice(len(ids),6)]
rand_ids

### Original Images

In [None]:
fig = plt.figure(figsize=(15, 10))
n = len(rand_ids)
for i in range(n):
    fig.add_subplot(int(n/2),2,i+1)
    I = cv2.imread(os.path.join(train_dir, rand_ids[i]+".png"))
    I = cv2.cvtColor(I, cv2.COLOR_BGR2RGB)
    plt.imshow(I)
    plt.xlabel(rand_ids[i] + ".png")
    plt.grid(None)

### Cropped Images

In [None]:
fig = plt.figure(figsize=(15, 10))
n = len(rand_ids)
for i in range(n):
    I = cv2.imread(os.path.join(train_dir, rand_ids[i]+".png"))
    I = cv2.cvtColor(I, cv2.COLOR_BGR2RGB)
    I = I.astype('float32')
    I = cropEye(I, 128)
    I = I/255

    fig.add_subplot(int(n/2),2,i+1)
    plt.imshow(I)
    plt.xlabel(rand_ids[i] + ".png")
    plt.grid(None)