# ConvNEXT for DR detection

## Setup

### Google Drive Access

In [1]:
from google.colab import drive
import os

# Parameters
DATASET_PATH = '/content/drive/My Drive/University Of Stirling/Dissertation/ConvNEXT/APTOS2019'
PREP_PATH = DATASET_PATH + "/preprocessed/"

# Load Dataset From Drive
drive.mount('/content/drive')

os.chdir(DATASET_PATH)
print("CWD:",os.getcwd())

if not os.path.exists(PREP_PATH):
  os.mkdir(PREP_PATH)

Mounted at /content/drive
CWD: /content/drive/My Drive/University Of Stirling/Dissertation/ConvNEXT/APTOS2019


### Environment Setup

In [None]:
%%bash

pip install torch==1.8.0+cu111 torchvision==0.9.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
git clone https://github.com/facebookresearch/ConvNeXt
pip install timm==0.3.2 tensorboardX six
pip install submitit

## Imports and global parameters

In [2]:
import pandas as pd
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
import shutil

In [3]:
IMG_SIZE = (224,224)

## Dataset Preparation

In [6]:
import pandas as pd
import cv2
import numpy as np

dataset = pd.read_csv(DATASET_PATH + "/train.csv")

# ref: https://www.kaggle.com/code/ratthachat/aptos-eye-preprocessing-in-diabetic-retinopathy/notebook
# ref: https://www.kaggle.com/code/ratthachat/aptos-eye-preprocessing-in-diabetic-retinopathy/notebook
# ref for circle crop: https://github.com/debayanmitra1993-data/Blindness-Detection-Diabetic-Retinopathy-/blob/master/research_paper_implementation.ipynb
def crop_image_from_gray(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img>tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            img = np.stack([img1,img2,img3],axis=-1)
        return img

def circle_crop(img, sigmaX = 30):   
    """
    Create circular crop around image centre    
    """    
    img = crop_image_from_gray(img)    
    
    height, width, depth = img.shape    
    
    x = int(width/2)
    y = int(height/2)
    r = np.amin((x,y))
    
    circle_img = np.zeros((height, width), np.uint8)
    cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
    img = cv2.bitwise_and(img, img, mask=circle_img)
    img = crop_image_from_gray(img)
    return img 

def preprocess(id_code):
  path = DATASET_PATH + "/train_images/" + id_code + ".png"

  if(os.path.isfile(path) == False):
    print(id_code + " does not exist!")
    return

  img = cv2.imread(path)


  # Circle crop
  img = circle_crop(img)

  # Resize the image
  img = cv2.resize(img, (224, 224))

  # Extract Green Channel
  img[:,:,0] = 0
  img[:,:,2] = 0

  # Convert to Greyscale
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

  # Apply Gaussian Blur
  img = cv2.addWeighted(img,4, cv2.GaussianBlur( img , (0,0) , 512/10) ,-4 ,128)
  
  # Perform histogram equalization

  clahe = cv2.createCLAHE(clipLimit=5.0, tileGridSize=(8,8))
  img = clahe.apply(img)

  cv2.imwrite(PREP_PATH + id_code + ".png", img)


for id_code in dataset["id_code"]:
  preprocess(id_code) 

### Create Folder Structure

In [7]:
if not os.path.exists(DATASET_PATH + "/train"):
  os.mkdir(DATASET_PATH + "/train")
  os.mkdir(DATASET_PATH + "/train/0")
  os.mkdir(DATASET_PATH + "/train/1")
  os.mkdir(DATASET_PATH + "/train/2")
  os.mkdir(DATASET_PATH + "/train/3")
  os.mkdir(DATASET_PATH + "/train/4")

if not os.path.exists(DATASET_PATH + "/validation"):
  os.mkdir(DATASET_PATH + "/validation")
  os.mkdir(DATASET_PATH + "/validation/0")
  os.mkdir(DATASET_PATH + "/validation/1")
  os.mkdir(DATASET_PATH + "/validation/2")
  os.mkdir(DATASET_PATH + "/validation/3")
  os.mkdir(DATASET_PATH + "/validation/4")

if not os.path.exists(DATASET_PATH + "/test"):
  os.mkdir(DATASET_PATH + "/test")
  os.mkdir(DATASET_PATH + "/test/0")
  os.mkdir(DATASET_PATH + "/test/1")
  os.mkdir(DATASET_PATH + "/test/2")
  os.mkdir(DATASET_PATH + "/test/3")
  os.mkdir(DATASET_PATH + "/test/4")

### Train-test split

In [None]:
from sklearn.model_selection import train_test_split
import shutil

def prepare_dataset():  
  # Train-test split
  train, test = train_test_split(dataset, test_size=.2)
  train, valid = train_test_split(train, test_size=.3)

  for image in train.values:
    shutil.copyfile(PREP_PATH + image[0] + ".png", DATASET_PATH + "/train/" + str(image[1]) + "/" + image[0] + ".png")

  for image in valid.values:
    shutil.copyfile(PREP_PATH + image[0] + ".png", DATASET_PATH + "/validation/" + str(image[1]) + "/" + image[0] + ".png")

  for image in test.values:
    shutil.copyfile(PREP_PATH + image[0] + ".png", DATASET_PATH + "/test/" + str(image[1]) + "/" + image[0] + ".png")
  
  return (train, valid, test)


train, valid, test = prepare_dataset()

print(train.shape, valid.shape, test.shape)
