# EyeQ --> https://github.com/HzFu/EyeQ/tree/master

#### Citacion 

[1] Huazhu Fu, Boyang Wang, Jianbing Shen, Shanshan Cui, Yanwu Xu, Jiang Liu, Ling Shao, "Evaluation of Retinal Image Quality Assessment Networks in Different Color-spaces", in MICCAI, 2019. [PDF] Note: The corrected accuracy score of MCF-Net is 0.8800.



# EyePacs
https://www.kaggle.com/competitions/diabetic-retinopathy-detection/overview


In [2]:
import pandas as pd
import glob
from pathlib import Path
import os

In [3]:
os.getcwd()

'F:\\repos\\test_retina'

In [4]:
path =os.getcwd()

In [5]:
import os
os.chdir(path)

In [6]:
path_images = os.path.join(path, "eyePacs", "pre-processed") # all EyePacs Preprocessed 
path_images_test = os.path.join(path, "eyePacs", "test")
path_images_train = os.path.join(path, "eyePacs", "train")
path_images_test_pre = os.path.join(path, "eyePacs", "test-pre")
path_images_train_pre = os.path.join(path, "eyePacs", "train-pre")

In [7]:
train_lst = [f.split("\\")[-1] for f in glob.glob(path_images_train + "/*.jpeg")]
test_lst = [f.split("\\")[-1] for f in glob.glob(path_images_test + "/*.jpeg")]
pre_lst = [f.split("\\")[-1] for f in glob.glob(path_images + "/*.jpeg")]

In [8]:
train_lst[0]

'10003_left.jpeg'

In [9]:
len(train_lst), len(test_lst), len(pre_lst)

(35126, 53576, 88702)

# EyeQ Metadata

In [10]:
train = pd.read_csv(os.path.join(path, "eyePacs", "Label_EyeQ_train.csv"), index_col=0)
test = pd.read_csv(os.path.join(path, "eyePacs", "Label_EyeQ_test.csv"), index_col=0)
test.head()

Unnamed: 0,image,quality,DR_grade
0,1_right.jpeg,1,0
1,10001_right.jpeg,0,2
2,10004_right.jpeg,0,0
3,10008_left.jpeg,0,0
4,10016_right.jpeg,0,2


In [11]:
train.DR_grade.value_counts()

DR_grade
0    9239
2    1809
1     911
3     333
4     251
Name: count, dtype: int64

In [12]:
test.DR_grade.value_counts()

DR_grade
0    11362
2     2644
1     1398
3      448
4      397
Name: count, dtype: int64

In [13]:
def apply_label(x):
    if int(x) == 0:
        return 0
    else:
        return 1

In [14]:
train['label'] = train['DR_grade'].apply(apply_label)
test['label'] = test['DR_grade'].apply(apply_label)

In [15]:
train.label.value_counts(), test.label.value_counts()

(label
 0    9239
 1    3304
 Name: count, dtype: int64,
 label
 0    11362
 1     4887
 Name: count, dtype: int64)

In [16]:
train.columns = ['image_file_name', 'quality', 'DR_grade', 'label']
test.columns = ['image_file_name', 'quality', 'DR_grade', 'label']

In [17]:
train.to_csv(os.path.join(path, "eyePacs", "metadata_train.csv"), index=False)
test.to_csv(os.path.join(path, "eyePacs", "metadata_test.csv"), index=False)

In [81]:
trainl = train['image_file_name'].to_list()
testl = test['image_file_name'].to_list()

In [82]:
# Check if all images are in Pre-processed

In [83]:
trainls = set(trainl)
testls = set(testl)

In [84]:
[print(x) for x  in trainl if not(x in pre_lst)] # check if train images are in pre-processed images

[]

In [85]:
[print(x) for x  in testl if not(x in pre_lst)] # check if test images are in pre-processed images

[]

### Now we join test and train and take 9000 train, 1200 test and 1200 eval

In [18]:
train = pd.concat([train, test], ignore_index=True)

In [19]:
train.quality.value_counts()

quality
0    16818
1     6434
2     5540
Name: count, dtype: int64

# remove reject quality

In [21]:
train = train[train.quality < 2]
len(train)

23252

# Create Fine Tune Dataframes

In [22]:
trainq = train['label'].to_list()

In [23]:
train['label'].value_counts()

label
0    16862
1     6390
Name: count, dtype: int64

In [24]:
TRAIN_LENGTH = 8000
TEST_LENGTH = 1000
VAL_LENGTH = 1000

In [25]:
def sample(df, label,  value, number):
    return df[df[label] ==value].sample(number)
    

# Create train Dataframe

In [26]:
dataframes =[]
for label in list(train['label'].unique()):
    dataframes.append(sample(train, "label",  label, int(TRAIN_LENGTH/ len(list(train['label'].unique())))))
train_df = pd.concat(dataframes, ignore_index=True)
print(len(train_df), train_df['label'].value_counts())

8000 label
0    4000
1    4000
Name: count, dtype: int64


# Create Val Dataset

In [27]:
train['image_file_name'].isin( train_df['image_file_name'].to_list()) 

0         True
1        False
4        False
5        False
6        False
         ...  
28786     True
28787    False
28788     True
28789    False
28790    False
Name: image_file_name, Length: 23252, dtype: bool

In [28]:
train_rest = train[~(train['image_file_name'].isin( train_df['image_file_name'].to_list()))]

In [29]:
len(train_rest)

15252

In [31]:
dataframes =[]
for label in list(train_rest['label'].unique()):
    dataframes.append(sample(train_rest, "label",  label, 500))
val_df = pd.concat(dataframes, ignore_index=True)
print(len(val_df), val_df['label'].value_counts())

1000 label
0    500
1    500
Name: count, dtype: int64


# Create Test Dataset HoldOut 

In [32]:
train_rest = train_rest[~(train_rest['image_file_name'].isin( val_df['image_file_name'].to_list()))]
len(train_rest)

14252

In [33]:
dataframes =[]
for label in list(train_rest['label'].unique()):
    dataframes.append(sample(train_rest, "label",  label, 500))
test_df = pd.concat(dataframes, ignore_index=True)
print(len(test_df), test_df['label'].value_counts())

1000 label
0    500
1    500
Name: count, dtype: int64


# Save Dataframes

In [34]:
train_df.to_csv("drtrain.csv", index=False)
val_df.to_csv("drval.csv", index=False)
test_df.to_csv("drtest.csv", index=False)

# List Images Train, Test, Val

In [35]:
trainl = train_df['image_file_name'].to_list()
trainq = train_df['label'].astype(int).to_list()
testl = test_df['image_file_name'].to_list()
testq = test_df['label'].astype(int).to_list()
vall = val_df['image_file_name'].to_list()
valq = val_df['label'].astype(int).to_list()

# Copy Images for Experiment

In [36]:
path_images

'F:\\repos\\test_retina\\eyePacs\\pre-processed'

In [37]:
label_dic={0:"NO_DR", 1:"DR"}

In [38]:
trainq[-10:]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [39]:
label_dic.get(0)

'NO_DR'

In [40]:
train_df.label.value_counts()

label
0    4000
1    4000
Name: count, dtype: int64

In [41]:
# TRAIN
import shutil

for file, label in zip(trainl,trainq):
    src = os.path.join(path_images, file)
    dst = os.path.join(path , "dr", "train",label_dic.get(label), file)
    shutil.copyfile(src, dst)
    

In [42]:
# VALIDATION
for file, label in zip(vall,valq) :
    src = os.path.join(path_images, file)
    dst = os.path.join(path , "dr", "val",label_dic.get(label), file)
    shutil.copyfile(src, dst)

In [43]:
# TEST
for file, label in zip(testl,testq) :
    src = os.path.join(path_images, file)
    dst = os.path.join(path , "dr", "test",label_dic.get(label), file)
    shutil.copyfile(src, dst)

In [44]:
len(trainl), len(testl), len(vall)

(8000, 1000, 1000)