Importing kaggle.json file from my kaggle account

In [0]:
from google.colab import files
files.upload()

Making a kaggle directory in root,
Copying kaggle.json file 
Giving permission to kaggle.json

In [0]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json

Downloading Diabetic Retinopathy data from kaggle CLI 

In [0]:
!kaggle competitions download -c aptos2019-blindness-detection

Making train and test images directory to extract and save the images to their respective directories

In [0]:
!mkdir train_images
!mkdir test_images

Extracting the zip files

In [0]:
from zipfile import ZipFile as zf

with zf('/content/train_images.zip','r') as zip_ref:
  zip_ref.extractall('/content/train_images/')
  
print("Train extract finished...")
!rm -rf train_images.zip

In [0]:
with zf('/content/test_images.zip','r') as zip_ref:
  zip_ref.extractall('/content/test_images/')
  
print("Test extract finished...")
!rm -rf test_images.zip

Importing required modules

In [0]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Dense,Dropout,BatchNormalization,Activation,Conv2D,MaxPooling2D,Flatten
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import regularizers, optimizers
from matplotlib import pyplot as plt
import shutil
import cv2
from PIL import Image

In [0]:
os.chdir('./train_images')
train_images = os.listdir()
os.chdir('./../test_images')
test_images = os.listdir()
os.chdir('./../')

In [0]:
train_csv = pd.read_csv('./train.csv')
test_csv  = pd.read_csv('./test.csv')

In [151]:
test_csv.shape

(1928, 1)

In [152]:
len(test_images)

1928

In [0]:
train_csv['id_code'] = train_csv['id_code'].map(lambda x : x+'.png')
test_csv['id_code'] = test_csv['id_code'].map(lambda x : x+'.png')

In [0]:
train_csv['diagnosis'] = train_csv['diagnosis'].apply(str)

Splitting the CSV to Training and Test Dataset

In [0]:
x_train,x_val,y_train,y_val = train_test_split(train_csv.id_code,train_csv.diagnosis,test_size=0.2)

Converting the Fundus Image to 

1.   Gray Scale and Masking all the pixel values in that image and stacking the 3 color channels again to get the real image
2.    Removing the black contours from the image



In [0]:
def crop_image_from_gray(img, tol=7):
    path = f'./test_images/{label}'
    img = cv2.imread(path)
    if img.ndim == 2:
        mask = img > tol
        return img[np.ix_(mask.any(1), mask.any(0))]
    elif img.ndim == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img > tol
        check_shape = img[:, :, 0][np.ix_(mask.any(1), mask.any(0))].shape[0]
        if (check_shape == 0):
            return img
        else:
            img1 = img[:, :, 0][np.ix_(mask.any(1), mask.any(0))]
            img2 = img[:, :, 1][np.ix_(mask.any(1), mask.any(0))]
            img3 = img[:, :, 2][np.ix_(mask.any(1), mask.any(0))]
            img = np.stack([img1, img2, img3], axis=-1)
            dim = (240,120)
            img = cv2.resize(img,dim,interpolation = cv2.INTER_AREA)
        return img

      
def circle_crop(img):
    
    img = crop_image_from_gray(img)
  
    height, width, depth = img.shape
    largest_side = np.max((height, width))
    img = cv2.resize(img, (largest_side, largest_side))

    height, width, depth = img.shape

    x = int(width / 2)
    y = int(height / 2)
    r = np.amin((x, y))

    circle_img = np.zeros((height, width), np.uint8)
    cv2.circle(circle_img, (x, y), int(r), 1, thickness=-1)
    img = cv2.bitwise_and(img, img, mask=circle_img)
    img = crop_image_from_gray(img)

    return img

1.  Converting the Fundus Images to green color channel so that we can the arteries and veins clearly
2.   Resizing it into (224,224) so that it matches with the DenseNet121 Convolution Neural Network

In [0]:
from google.colab.patches import cv2_imshow

def img_to_green(img):
  
    circle_crop(img)
    dim = (224,224)
    image = cv2.imread(path)
    image = image[:,:,1]
    clahe = cv2.createCLAHE(clipLimit=30.0, tileGridSize=(8,8))
    image = cv2.medianBlur(image,3)
    image = cv2.equalizeHist(image)
    image = clahe.apply(image)
    image = cv2.resize(image,dim,interpolation = cv2.INTER_AREA)
    
    return image


Overall Preprocessing function

In [0]:
def preprocess_image(path):
    img = cv2.imread(path)
    img = img_to_green(img)
    cv2.imwrite(path,img)

Preprocessing the training images

In [0]:
i=0
for label in train_images:
    path = f'./train_images/{label}'
    preprocess_image(path)
    print(i)
    i+=1

Preprocessing the testing image

In [0]:
i=0
for label in test_images:
    path = f'./test_images/{label}'
    preprocess_image(path)
    print(i)
    i+=1

Distribution of Diagnosis 

In [0]:
plt.hist(train_csv['diagnosis'])
plt.hist(test_csv['diagnosis'])
plt.title("Diagnosis Distribution",color='white')
plt.show()

In [97]:
generator = ImageDataGenerator(rescale=1.0/255.0,
                              validation_split=0.1,
                              horizontal_flip=True,
                              vertical_flip=True,
                              )
train_path = './train_images/'
train_generator = generator.flow_from_dataframe(
             dataframe = train_csv,
             directory = train_path,
             subset='training',
             target_size=(224,224),
             x_col = 'id_code',
             y_col = 'diagnosis',
             class_mode='categorical',
             shuffle=True,
             batch_size = 32)

Found 3296 validated image filenames belonging to 5 classes.


In [98]:
validation_generator = generator.flow_from_dataframe(dataframe=train_csv,
                                                    directory=train_path,
                                                    subset='validation',
                                                    target_size=(224,224),
                                                    x_col='id_code',
                                                    y_col='diagnosis',
                                                    shuffle=True,
                                                    class_mode='categorical',
                                                    batch_size=32)

Found 366 validated image filenames belonging to 5 classes.


In [156]:
test_path = './test_images/'
test_generator = generator.flow_from_dataframe(dataframe=test_csv,
                                             directory=test_path,
                                             target_size=(224,224),
                                             x_col='id_code',
                                             y_col=None,
                                             class_mode=None,
                                             shuffle=False,
                                             batch_size=32)

Found 1928 validated image filenames.


In [0]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(224,224,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='softmax'))

In [121]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 224, 224, 32)      896       
_________________________________________________________________
activation_9 (Activation)    (None, 224, 224, 32)      0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 222, 222, 32)      9248      
_________________________________________________________________
activation_10 (Activation)   (None, 222, 222, 32)      0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 111, 111, 32)      0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 111, 111, 64)     

In [0]:
model.compile(tf.keras.optimizers.RMSprop(lr=0.0001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])

In [0]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

In [124]:
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=validation_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f0de2a07cc0>

In [0]:
model.save_weights('diabetic_retinopathy.h5')

In [126]:
model.evaluate_generator(generator=validation_generator,steps=STEP_SIZE_TEST)

[0.7134855672717094, 0.73333335]

In [163]:
test_generator.reset()

pred=model.predict_generator(test_generator,
                             verbose=1)



In [164]:
print(STEP_SIZE_TEST)

60


In [165]:
len(pred)

1928

In [0]:
predicted_class_indices=np.argmax(pred,axis=1)

In [147]:
len(predicted_class_indices)

1920

In [148]:
len(test_images)

1928

In [0]:
for i in range(len(predicted_class_indices)):
  print(predicted_class_indices[i])

In [0]:
def submit(predictions):
  sub = pd.read_csv('./sample_submission.csv')
  sub['diagnosis'] = predictions
  sub.to_csv('./sub.csv',index=False)

In [0]:
submit(predicted_class_indices)