# EyeQ --> https://github.com/HzFu/EyeQ/tree/master

#### Citacion 

[1] Huazhu Fu, Boyang Wang, Jianbing Shen, Shanshan Cui, Yanwu Xu, Jiang Liu, Ling Shao, "Evaluation of Retinal Image Quality Assessment Networks in Different Color-spaces", in MICCAI, 2019. [PDF] Note: The corrected accuracy score of MCF-Net is 0.8800.



# EyePacs
https://www.kaggle.com/competitions/diabetic-retinopathy-detection/overview


In [1]:
import pandas as pd
import glob
from pathlib import Path
import os

In [2]:
os.getcwd()

'F:\\repos\\test_retina'

In [3]:
path =os.getcwd()

In [4]:
import os
os.chdir(path)

In [10]:
path_images = os.path.join(path, "eyePacs", "pre-processed") # all EyePacs Preprocessed 
path_images_test = os.path.join(path, "eyePacs", "test")
path_images_train = os.path.join(path, "eyePacs", "train")
path_images_test_pre = os.path.join(path, "eyePacs", "test-pre")
path_images_train_pre = os.path.join(path, "eyePacs", "train-pre")

In [11]:
train_lst = [f.split("\\")[-1] for f in glob.glob(path_images_train + "/*.jpeg")]
test_lst = [f.split("\\")[-1] for f in glob.glob(path_images_test + "/*.jpeg")]
pre_lst = [f.split("\\")[-1] for f in glob.glob(path_images + "/*.jpeg")]

In [12]:
train_lst[0]

'10003_left.jpeg'

In [13]:
len(train_lst), len(test_lst), len(pre_lst)

(35126, 53576, 88702)

# EyeQ Metadata

In [14]:
train = pd.read_csv(os.path.join(path, "eyePacs", "Label_EyeQ_train.csv"), index_col=0)
test = pd.read_csv(os.path.join(path, "eyePacs", "Label_EyeQ_test.csv"), index_col=0)
test.head()

Unnamed: 0,image,quality,DR_grade
0,1_right.jpeg,1,0
1,10001_right.jpeg,0,2
2,10004_right.jpeg,0,0
3,10008_left.jpeg,0,0
4,10016_right.jpeg,0,2


In [15]:
train.quality.value_counts()

quality
0    8347
2    2320
1    1876
Name: count, dtype: int64

In [16]:
test.quality.value_counts()

quality
0    8471
1    4558
2    3220
Name: count, dtype: int64

In [17]:
def apply_label(x):
    if x == 0:
        return True
    else:
        return False

In [18]:
train['label'] = train['quality'].apply(apply_label)
test['label'] = test['quality'].apply(apply_label)

In [19]:
train.label.value_counts(), test.label.value_counts()

(label
 True     8347
 False    4196
 Name: count, dtype: int64,
 label
 True     8471
 False    7778
 Name: count, dtype: int64)

In [20]:
train.columns = ['image_file_name', 'quality', 'DR_grade', 'label']
test.columns = ['image_file_name', 'quality', 'DR_grade', 'label']

In [21]:
train.to_csv(os.path.join(path, "eyePacs", "metadata_train.csv"), index=False)
test.to_csv(os.path.join(path, "eyePacs", "metadata_test.csv"), index=False)

In [22]:
trainl = train['image_file_name'].to_list()
testl = test['image_file_name'].to_list()

In [23]:
# Check if all images are in Pre-processed

In [24]:
trainls = set(trainl)
testls = set(testl)

In [25]:
[print(x) for x  in trainl if not(x in pre_lst)] # check if train images are in pre-processed images

[]

In [26]:
[print(x) for x  in testl if not(x in pre_lst)] # check if test images are in pre-processed images

[]

### Now we join test and train and take 9000 train, 1200 test and 1200 eval

In [27]:
train = pd.concat([train, test], ignore_index=True)

In [28]:
trainq = train['quality'].to_list()

In [29]:
train['quality'].value_counts()

quality
0    16818
1     6434
2     5540
Name: count, dtype: int64

In [30]:
TRAIN_LENGTH = 9000
TEST_LENGTH = 1200
VAL_LENGTH = 1200

In [31]:
def sample(df, label,  value, number):
    return df[df[label] ==value].sample(number)
    

# Create train Dataframe

In [32]:
dataframes =[]
for label in list(train['quality'].unique()):
    dataframes.append(sample(train, "quality",  label, int(TRAIN_LENGTH/ len(list(train['quality'].unique())))))
train_df = pd.concat(dataframes, ignore_index=True)
print(len(train_df), train_df['quality'].value_counts())

9000 quality
0    3000
2    3000
1    3000
Name: count, dtype: int64


# Create Val Dataset

In [33]:
train['image_file_name'].isin( train_df['image_file_name'].to_list()) 

0        False
1        False
2        False
3         True
4        False
         ...  
28787    False
28788    False
28789    False
28790    False
28791     True
Name: image_file_name, Length: 28792, dtype: bool

In [34]:
train_rest = train[~(train['image_file_name'].isin( train_df['image_file_name'].to_list()))]

In [35]:
len(train_rest)

19792

In [37]:
dataframes =[]
for label in list(train_rest['quality'].unique()):
    dataframes.append(sample(train_rest, "quality",  label, 400))
val_df = pd.concat(dataframes, ignore_index=True)
print(len(val_df), val_df['quality'].value_counts())

1200 quality
0    400
2    400
1    400
Name: count, dtype: int64


# Create Test Dataset HoldOut 

In [38]:
train_rest = train_rest[~(train_rest['image_file_name'].isin( val_df['image_file_name'].to_list()))]
len(train_rest)

18592

In [39]:
dataframes =[]
for label in list(train_rest['quality'].unique()):
    dataframes.append(sample(train_rest, "quality",  label, 400))
test_df = pd.concat(dataframes, ignore_index=True)
print(len(test_df), test_df['quality'].value_counts())

1200 quality
0    400
2    400
1    400
Name: count, dtype: int64


# Save Dataframes

In [40]:
train_df.to_csv("train.csv", index=False)
val_df.to_csv("val.csv", index=False)
test_df.to_csv("test.csv", index=False)

# List Images Train, Test, Val

In [41]:
trainl = train_df['image_file_name'].to_list()
trainq = train_df['quality'].to_list()
testl = test_df['image_file_name'].to_list()
testq = test_df['quality'].to_list()
vall = val_df['image_file_name'].to_list()
valq = val_df['quality'].to_list()

# Copy Images for Experiment

In [42]:
path_images

'F:\\repos\\test_retina\\eyePacs\\pre-processed'

In [43]:
label_dic={0:"good", 1:"usable",2:"reject"}

In [44]:
label_dic.get(0)

'good'

In [45]:
train_df.quality.value_counts()

quality
0    3000
2    3000
1    3000
Name: count, dtype: int64

In [46]:
# TRAIN
import shutil

for file, label in zip(trainl,trainq):
    src = os.path.join(path_images, file)
    dst = os.path.join(path , "canal", "train",label_dic.get(label), file)
    shutil.copyfile(src, dst)
    

In [47]:
# VALIDATION
for file, label in zip(vall,valq) :
    src = os.path.join(path_images, file)
    dst = os.path.join(path , "canal", "val",label_dic.get(label), file)
    shutil.copyfile(src, dst)

In [48]:
# TEST
for file, label in zip(testl,testq) :
    src = os.path.join(path_images, file)
    dst = os.path.join(path , "canal", "test",label_dic.get(label), file)
    shutil.copyfile(src, dst)

In [49]:
len(trainl), len(testl), len(vall)

(9000, 1200, 1200)