# Data Cleaning
In this notebook we will clean the data. We will separate the right eye data and the left eye data.
1. Created a mapping of the unique keywords
2. Create separate dataframe for left and right eye.
3. Separate data based on mapping of unique keywords.

In [None]:
# import the required libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
disease_map = {
 'age-related macular degeneration':'A',
 'cataract':'C',
 'diabetic retinopathy':'D',
 'dry age-related macular degeneration':'A',
 'glaucoma':'G',
 'hypertensive retinopathy':'H',
 'mild nonproliferative retinopathy':'D',
 'moderate non proliferative retinopathy':'D',
 'myopia retinopathy':'M',
 'myopic maculopathy':'M',
 'myopic retinopathy':'M',
 'normal fundus':'N',
 'pathological myopia':'M',
 'proliferative diabetic retinopathy':'D',
 'severe nonproliferative retinopathy':'D',
 'severe proliferative diabetic retinopathy':'D',
 'suspected cataract':'C',
 'suspected diabetic retinopathy':'D',
 'suspected glaucoma':'G',
 'suspected moderate non proliferative retinopathy':'D',
 'suspicious diabetic retinopathy':'D',
 'wet age-related macular degeneration':'A',
 'anterior segment image':'DELETE',
 'image offset':'DELETE',
 'low image quality':'DELETE',
 'no fundus image':'DELETE',
 'optic disk photographically invisible':'DELETE'}


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
dataset_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/TrainAnot.xlsx"

In [None]:
original_df = pd.read_excel(dataset_path)

In [None]:
original_df.shape

(3500, 15)

In [None]:
original_df.head(20)

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,0,69,Female,0_left.jpg,0_right.jpg,cataract,normal fundus,0,0,0,1,0,0,0,0
1,1,57,Male,1_left.jpg,1_right.jpg,normal fundus,normal fundus,1,0,0,0,0,0,0,0
2,2,42,Male,2_left.jpg,2_right.jpg,laser spot，moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,1
3,3,66,Male,3_left.jpg,3_right.jpg,normal fundus,branch retinal artery occlusion,0,0,0,0,0,0,0,1
4,4,53,Male,4_left.jpg,4_right.jpg,macular epiretinal membrane,mild nonproliferative retinopathy,0,1,0,0,0,0,0,1
5,5,50,Female,5_left.jpg,5_right.jpg,moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0
6,6,60,Male,6_left.jpg,6_right.jpg,macular epiretinal membrane,moderate non proliferative retinopathy，epireti...,0,1,0,0,0,0,0,1
7,7,60,Female,7_left.jpg,7_right.jpg,drusen,mild nonproliferative retinopathy,0,1,0,0,0,0,0,1
8,8,59,Male,8_left.jpg,8_right.jpg,normal fundus,normal fundus,1,0,0,0,0,0,0,0
9,9,54,Male,9_left.jpg,9_right.jpg,normal fundus,vitreous degeneration,0,0,0,0,0,0,0,1


In [None]:
left_eye_df = original_df[['Left-Fundus','Left-Diagnostic Keywords']].copy()
left_eye_df.head(15)

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords
0,0_left.jpg,cataract
1,1_left.jpg,normal fundus
2,2_left.jpg,laser spot，moderate non proliferative retinopathy
3,3_left.jpg,normal fundus
4,4_left.jpg,macular epiretinal membrane
5,5_left.jpg,moderate non proliferative retinopathy
6,6_left.jpg,macular epiretinal membrane
7,7_left.jpg,drusen
8,8_left.jpg,normal fundus
9,9_left.jpg,normal fundus


In [None]:
left_eye_df.shape

(3500, 2)

In [None]:
left_eye_df['N'] = 0
left_eye_df['D'] = 0
left_eye_df['G'] = 0
left_eye_df['C'] = 0
left_eye_df['A'] = 0
left_eye_df['H'] = 0
left_eye_df['M'] = 0
left_eye_df['O'] = 0
left_eye_df['DELETE'] = 0

In [None]:
left_eye_df.head()

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,0_left.jpg,cataract,0,0,0,0,0,0,0,0,0
1,1_left.jpg,normal fundus,0,0,0,0,0,0,0,0,0
2,2_left.jpg,laser spot，moderate non proliferative retinopathy,0,0,0,0,0,0,0,0,0
3,3_left.jpg,normal fundus,0,0,0,0,0,0,0,0,0
4,4_left.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,0,0


In [None]:
i = 0
for keywords in left_eye_df['Left-Diagnostic Keywords']:
    keywords = keywords.strip().split('，')
    for keyword in keywords:
        if not (keyword == 'lens dust'):
            left_eye_df.loc[i,disease_map.get(keyword, 'O')] = 1
    i += 1
    

In [None]:
left_eye_df.loc[2, 'Left-Fundus']

'2_left.jpg'

In [None]:
left_eye_df.head()
# left_eye_df[left_eye_df['Left-Diagnostic Keywords'] == 'lens dust，normal fundus']

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,0_left.jpg,cataract,0,0,0,1,0,0,0,0,0
1,1_left.jpg,normal fundus,1,0,0,0,0,0,0,0,0
2,2_left.jpg,laser spot，moderate non proliferative retinopathy,0,1,0,0,0,0,0,1,0
3,3_left.jpg,normal fundus,1,0,0,0,0,0,0,0,0
4,4_left.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1,0


In [None]:
left_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/left_eye.xlsx')

In [None]:
right_eye_df = original_df[['Right-Fundus','Right-Diagnostic Keywords']].copy()
right_eye_df.head(15)

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords
0,0_right.jpg,normal fundus
1,1_right.jpg,normal fundus
2,2_right.jpg,moderate non proliferative retinopathy
3,3_right.jpg,branch retinal artery occlusion
4,4_right.jpg,mild nonproliferative retinopathy
5,5_right.jpg,moderate non proliferative retinopathy
6,6_right.jpg,moderate non proliferative retinopathy，epireti...
7,7_right.jpg,mild nonproliferative retinopathy
8,8_right.jpg,normal fundus
9,9_right.jpg,vitreous degeneration


In [None]:
right_eye_df.shape

(3500, 2)

In [None]:
right_eye_df['N'] = 0
right_eye_df['D'] = 0
right_eye_df['G'] = 0
right_eye_df['C'] = 0
right_eye_df['A'] = 0
right_eye_df['H'] = 0
right_eye_df['M'] = 0
right_eye_df['O'] = 0
right_eye_df['DELETE'] = 0

In [None]:
right_eye_df.head()

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,0_right.jpg,normal fundus,0,0,0,0,0,0,0,0,0
1,1_right.jpg,normal fundus,0,0,0,0,0,0,0,0,0
2,2_right.jpg,moderate non proliferative retinopathy,0,0,0,0,0,0,0,0,0
3,3_right.jpg,branch retinal artery occlusion,0,0,0,0,0,0,0,0,0
4,4_right.jpg,mild nonproliferative retinopathy,0,0,0,0,0,0,0,0,0


In [None]:
i = 0
for keywords in right_eye_df['Right-Diagnostic Keywords']:
    keywords = keywords.strip().split('，')
    for keyword in keywords:
        if not (keyword == 'lens dust'):
            right_eye_df.loc[i,disease_map.get(keyword, 'O')] = 1
    i += 1

In [None]:
keywords

['hypertensive retinopathy', 'age-related macular degeneration']

In [None]:
right_eye_df.head(15)

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,0_right.jpg,normal fundus,1,0,0,0,0,0,0,0,0
1,1_right.jpg,normal fundus,1,0,0,0,0,0,0,0,0
2,2_right.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0,0
3,3_right.jpg,branch retinal artery occlusion,0,0,0,0,0,0,0,1,0
4,4_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
5,5_right.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0,0
6,6_right.jpg,moderate non proliferative retinopathy，epireti...,0,1,0,0,0,0,0,1,0
7,7_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
8,8_right.jpg,normal fundus,1,0,0,0,0,0,0,0,0
9,9_right.jpg,vitreous degeneration,0,0,0,0,0,0,0,1,0


In [None]:
right_eye_df.shape

(3500, 11)

In [None]:
right_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/right_eye.xlsx')

In [None]:
left_delete_index = list(left_eye_df[left_eye_df['DELETE'] == 1].index)
left_delete_index

[493,
 1170,
 1178,
 1229,
 1461,
 2829,
 2889,
 3015,
 3032,
 3147,
 3288,
 3293,
 3354,
 3408]

In [None]:
updated_left_eye_df = left_eye_df.drop(left_delete_index, axis=0)
updated_left_eye_df

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,0_left.jpg,cataract,0,0,0,1,0,0,0,0,0
1,1_left.jpg,normal fundus,1,0,0,0,0,0,0,0,0
2,2_left.jpg,laser spot，moderate non proliferative retinopathy,0,1,0,0,0,0,0,1,0
3,3_left.jpg,normal fundus,1,0,0,0,0,0,0,0,0
4,4_left.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
3495,4686_left.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
3496,4688_left.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0,0
3497,4689_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
3498,4690_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0


In [None]:
updated_left_eye_df[updated_left_eye_df['DELETE'] == 1]

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE


In [None]:
updated_left_eye_df.drop('DELETE', axis=1, inplace=True)
updated_left_eye_df

Deleting index 873 (875_left.jpg) since it has (epiretinal membrane，normal fundus，lens dust)

In [None]:
updated_left_eye_df.drop(index=873, axis=0, inplace=True)

In [None]:
updated_left_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/updated_left_eye.xlsx')

In [None]:
right_delete_index = list(right_eye_df[right_eye_df['DELETE'] == 1].index)
right_delete_index

[371,
 1157,
 1244,
 1462,
 2840,
 2941,
 2994,
 3015,
 3042,
 3058,
 3120,
 3176,
 3244,
 3274,
 3429]

In [None]:
updated_right_eye_df = right_eye_df.drop(right_delete_index, axis=0)
updated_right_eye_df

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,0_right.jpg,normal fundus,1,0,0,0,0,0,0,0,0
1,1_right.jpg,normal fundus,1,0,0,0,0,0,0,0,0
2,2_right.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0,0
3,3_right.jpg,branch retinal artery occlusion,0,0,0,0,0,0,0,1,0
4,4_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
3495,4686_right.jpg,proliferative diabetic retinopathy,0,1,0,0,0,0,0,0,0
3496,4688_right.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0,0
3497,4689_right.jpg,normal fundus,1,0,0,0,0,0,0,0,0
3498,4690_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0


In [None]:
updated_right_eye_df[updated_right_eye_df['DELETE'] == 1]

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE


In [None]:
updated_right_eye_df.drop('DELETE', axis=1, inplace=True)
updated_right_eye_df

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,0_right.jpg,normal fundus,1,0,0,0,0,0,0,0
1,1_right.jpg,normal fundus,1,0,0,0,0,0,0,0
2,2_right.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0
3,3_right.jpg,branch retinal artery occlusion,0,0,0,0,0,0,0,1
4,4_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
3495,4686_right.jpg,proliferative diabetic retinopathy,0,1,0,0,0,0,0,0
3496,4688_right.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0
3497,4689_right.jpg,normal fundus,1,0,0,0,0,0,0,0
3498,4690_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0


Deleting index 57 (57_right.jpg lens dust，drusen，normal fundus) and 1004 (1017_right.jpg lens dust，epiretinal membrane，normal fundus)

In [None]:
updated_right_eye_df.drop([57, 1004], axis=0, inplace=True)

- 2174_right.jpg
- 2175_left.jpg
- 2176_left.jpg
- 2177_left.jpg
- 2177_right.jpg
- 2178_right.jpg
- 2179_left.jpg
- 2179_right.jpg
- 2180_left.jpg
- 2180_right.jpg
- 2181_left.jpg
- 2181_right.jpg
- 2182_left.jpg
- 2182_right.jpg
- 2957_left.jpg
- 2957_right.jpg

In [None]:
updated_right_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/updated_right_eye.xlsx')

In [None]:
anomalous_left_img =[ 
  "2175_left.jpg",
  "2176_left.jpg",
  "2177_left.jpg",
  "2179_left.jpg",
 "2180_left.jpg",
 "2181_left.jpg",
 "2182_left.jpg",
 "2957_left.jpg"
 ]

In [None]:
for img_name in anomalous_left_img:
  updated_left_eye_df = updated_left_eye_df.drop(updated_left_eye_df[updated_left_eye_df['Left-Fundus'] == img_name].index)

In [None]:
updated_left_eye_df

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,0_left.jpg,cataract,0,0,0,1,0,0,0,0
1,1_left.jpg,normal fundus,1,0,0,0,0,0,0,0
2,2_left.jpg,laser spot，moderate non proliferative retinopathy,0,1,0,0,0,0,0,1
3,3_left.jpg,normal fundus,1,0,0,0,0,0,0,0
4,4_left.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
3495,4686_left.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0
3496,4688_left.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0
3497,4689_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
3498,4690_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0


Deleting index 371(372_left.jpg) since it has (low image quality,maculopathy)
- The comma here does not have proper spacing and it was not separated by above program.

In [None]:
updated_left_eye_df.drop(371, inplace=True)

In [None]:
updated_left_eye_df

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,0_left.jpg,cataract,0,0,0,1,0,0,0,0
1,1_left.jpg,normal fundus,1,0,0,0,0,0,0,0
2,2_left.jpg,laser spot，moderate non proliferative retinopathy,0,1,0,0,0,0,0,1
3,3_left.jpg,normal fundus,1,0,0,0,0,0,0,0
4,4_left.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
3495,4686_left.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0
3496,4688_left.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0
3497,4689_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
3498,4690_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0


In [None]:
updated_left_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/updated_left_eye.xlsx')

In [None]:
updated_right_eye_df[updated_right_eye_df['Right-Diagnostic Keywords'] == 'post laser photocoagulation,diabetic retinopathy,maculopathy']

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
540,541_right.jpg,"post laser photocoagulation,diabetic retinopat...",0,1,0,0,0,0,0,1


In [None]:
# if updated_right_eye_df['Right-Diagnostic Keywords'] =='post laser photocoagulation,diabetic retinopathy,maculopathy' :
#   
ix = updated_right_eye_df[updated_right_eye_df['Right-Diagnostic Keywords'] == 'post laser photocoagulation,diabetic retinopathy,maculopathy'].index
updated_right_eye_df.loc[ix,'D']=1

In [None]:
updated_right_eye_df.loc[ix,'Right-Diagnostic Keywords']='post laser photocoagulation, diabetic retinopathy, maculopathy'

In [None]:
updated_right_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/updated_right_eye.xlsx')

**Cleaning Test Data**

In [None]:
test_dataset_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TestSet/Annotation/TestAnot.xlsx"

In [None]:
test_original_df = pd.read_excel(test_dataset_path)

In [None]:
test_original_df.shape

(1000, 15)

In [None]:
test_original_df.head(20)

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,198,68,Female,198_left.jpg,198_right.jpg,"cataract,hypertensive retinopathy","cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
1,663,60,Male,663_left.jpg,663_right.jpg,"cataract,hypertensive retinopathy","cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
2,1025,56,Female,1025_left.jpg,1025_right.jpg,"hypertensive retinopathy,macular epiretinal me...",hypertensive retinopathy,0,0,0,0,0,1,0,1
3,1139,53,Male,1139_left.jpg,1139_right.jpg,moderate nonproliferative retinopathy,"moderate nonproliferative retinopathy,macular ...",0,1,0,0,0,0,0,1
4,1141,63,Male,1141_left.jpg,1141_right.jpg,mild nonproliferative retinopathy,macular epiretinal membrane,0,1,0,0,0,0,0,1
5,1149,57,Female,1149_left.jpg,1149_right.jpg,"laser spot,moderate nonproliferative retinopathy",mild nonproliferative retinopathy,0,1,0,0,0,0,0,1
6,1155,73,Male,1155_left.jpg,1155_right.jpg,moderate nonproliferative retinopathy,drusen,0,1,0,0,0,0,0,1
7,1158,33,Female,1158_left.jpg,1158_right.jpg,moderate nonproliferative retinopathy,drusen,0,1,0,0,0,0,0,1
8,1159,67,Female,1159_left.jpg,1159_right.jpg,moderate nonproliferative retinopathy,myelinated nerve fibers,0,1,0,0,0,0,0,1
9,1161,68,Female,1161_left.jpg,1161_right.jpg,"moderate nonproliferative retinopathy,macular ...",moderate nonproliferative retinopathy,0,1,0,0,0,0,0,1


In [None]:
test_left_eye_df = test_original_df[['Left-Fundus','Left-Diagnostic Keywords']].copy()
test_left_eye_df.head(15)

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords
0,198_left.jpg,"cataract,hypertensive retinopathy"
1,663_left.jpg,"cataract,hypertensive retinopathy"
2,1025_left.jpg,"hypertensive retinopathy,macular epiretinal me..."
3,1139_left.jpg,moderate nonproliferative retinopathy
4,1141_left.jpg,mild nonproliferative retinopathy
5,1149_left.jpg,"laser spot,moderate nonproliferative retinopathy"
6,1155_left.jpg,moderate nonproliferative retinopathy
7,1158_left.jpg,moderate nonproliferative retinopathy
8,1159_left.jpg,moderate nonproliferative retinopathy
9,1161_left.jpg,"moderate nonproliferative retinopathy,macular ..."


In [None]:
test_left_eye_df.shape

(1000, 2)

In [None]:
test_left_eye_df['N'] = 0
test_left_eye_df['D'] = 0
test_left_eye_df['G'] = 0
test_left_eye_df['C'] = 0
test_left_eye_df['A'] = 0
test_left_eye_df['H'] = 0
test_left_eye_df['M'] = 0
test_left_eye_df['O'] = 0
test_left_eye_df['DELETE'] = 0

In [None]:
test_left_eye_df.head()

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,198_left.jpg,"cataract,hypertensive retinopathy",0,0,0,0,0,0,0,0,0
1,663_left.jpg,"cataract,hypertensive retinopathy",0,0,0,0,0,0,0,0,0
2,1025_left.jpg,"hypertensive retinopathy,macular epiretinal me...",0,0,0,0,0,0,0,0,0
3,1139_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,0,0
4,1141_left.jpg,mild nonproliferative retinopathy,0,0,0,0,0,0,0,0,0


In [None]:
i = 0
for keywords in test_left_eye_df['Left-Diagnostic Keywords']:
    keywords = keywords.strip().split(',')
    for keyword in keywords:
        if not (keyword == 'lens dust'):
            test_left_eye_df.loc[i,disease_map.get(keyword, 'O')] = 1
    i += 1
    

In [None]:
test_left_eye_df.loc[2, 'Left-Fundus']

'1025_left.jpg'

In [None]:
test_left_eye_df.head()
# left_eye_df[left_eye_df['Left-Diagnostic Keywords'] == 'lens dust，normal fundus']

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,198_left.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0,0
1,663_left.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0,0
2,1025_left.jpg,"hypertensive retinopathy,macular epiretinal me...",0,0,0,0,0,1,0,1,0
3,1139_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
4,1141_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0


In [None]:
test_left_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TestSet/Annotation/test_left_eye.xlsx')

In [None]:
test_right_eye_df = test_original_df[['Right-Fundus','Right-Diagnostic Keywords']].copy()
test_right_eye_df.head(15)

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords
0,198_right.jpg,"cataract,hypertensive retinopathy"
1,663_right.jpg,"cataract,hypertensive retinopathy"
2,1025_right.jpg,hypertensive retinopathy
3,1139_right.jpg,"moderate nonproliferative retinopathy,macular ..."
4,1141_right.jpg,macular epiretinal membrane
5,1149_right.jpg,mild nonproliferative retinopathy
6,1155_right.jpg,drusen
7,1158_right.jpg,drusen
8,1159_right.jpg,myelinated nerve fibers
9,1161_right.jpg,moderate nonproliferative retinopathy


In [None]:
test_right_eye_df.shape

(1000, 2)

In [None]:
test_right_eye_df['N'] = 0
test_right_eye_df['D'] = 0
test_right_eye_df['G'] = 0
test_right_eye_df['C'] = 0
test_right_eye_df['A'] = 0
test_right_eye_df['H'] = 0
test_right_eye_df['M'] = 0
test_right_eye_df['O'] = 0
test_right_eye_df['DELETE'] = 0

In [None]:
test_right_eye_df.head()

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,198_right.jpg,"cataract,hypertensive retinopathy",0,0,0,0,0,0,0,0,0
1,663_right.jpg,"cataract,hypertensive retinopathy",0,0,0,0,0,0,0,0,0
2,1025_right.jpg,hypertensive retinopathy,0,0,0,0,0,0,0,0,0
3,1139_right.jpg,"moderate nonproliferative retinopathy,macular ...",0,0,0,0,0,0,0,0,0
4,1141_right.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,0,0


In [None]:
i = 0
for keywords in test_right_eye_df['Right-Diagnostic Keywords']:
    keywords = keywords.strip().split(',')
    for keyword in keywords:
        if not (keyword == 'lens dust'):
            test_right_eye_df.loc[i,disease_map.get(keyword, 'O')] = 1
    i += 1

In [None]:
test_right_eye_df.head(15)

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,198_right.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0,0
1,663_right.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0,0
2,1025_right.jpg,hypertensive retinopathy,0,0,0,0,0,1,0,0,0
3,1139_right.jpg,"moderate nonproliferative retinopathy,macular ...",0,0,0,0,0,0,0,1,0
4,1141_right.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1,0
5,1149_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
6,1155_right.jpg,drusen,0,0,0,0,0,0,0,1,0
7,1158_right.jpg,drusen,0,0,0,0,0,0,0,1,0
8,1159_right.jpg,myelinated nerve fibers,0,0,0,0,0,0,0,1,0
9,1161_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0


In [None]:
test_right_eye_df.shape

(1000, 11)

In [None]:
test_right_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TestSet/Annotation/test_right_eye.xlsx')

In [None]:
test_left_delete_index = list(test_left_eye_df[test_left_eye_df['DELETE'] == 1].index)
test_left_delete_index

[935, 972]

In [None]:
test_updated_left_eye_df = test_left_eye_df.drop(test_left_delete_index, axis=0)
test_updated_left_eye_df

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,198_left.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0,0
1,663_left.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0,0
2,1025_left.jpg,"hypertensive retinopathy,macular epiretinal me...",0,0,0,0,0,1,0,1,0
3,1139_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
4,1141_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
995,4995_left.jpg,normal fundus,1,0,0,0,0,0,0,0,0
996,4996_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
997,4997_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
998,4998_left.jpg,normal fundus,1,0,0,0,0,0,0,0,0


In [None]:
test_updated_left_eye_df[test_updated_left_eye_df['DELETE'] == 1]

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE


In [None]:
test_updated_left_eye_df.drop('DELETE', axis=1, inplace=True)
test_updated_left_eye_df

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,198_left.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
1,663_left.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
2,1025_left.jpg,"hypertensive retinopathy,macular epiretinal me...",0,0,0,0,0,1,0,1
3,1139_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
4,1141_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
995,4995_left.jpg,normal fundus,1,0,0,0,0,0,0,0
996,4996_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
997,4997_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
998,4998_left.jpg,normal fundus,1,0,0,0,0,0,0,0


In [None]:
test_updated_left_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TestSet/Annotation/test_updated_left_eye.xlsx')

In [None]:
test_right_delete_index = list(test_right_eye_df[test_right_eye_df['DELETE'] == 1].index)
test_right_delete_index

[273]

In [None]:
test_updated_right_eye_df = test_right_eye_df.drop(test_right_delete_index, axis=0)
test_updated_right_eye_df

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,198_right.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0,0
1,663_right.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0,0
2,1025_right.jpg,hypertensive retinopathy,0,0,0,0,0,1,0,0,0
3,1139_right.jpg,"moderate nonproliferative retinopathy,macular ...",0,0,0,0,0,0,0,1,0
4,1141_right.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
995,4995_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
996,4996_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
997,4997_right.jpg,normal fundus,1,0,0,0,0,0,0,0,0
998,4998_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0


In [None]:
test_updated_right_eye_df[test_updated_right_eye_df['DELETE'] == 1]

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE


In [None]:
test_updated_right_eye_df.drop('DELETE', axis=1, inplace=True)
test_updated_right_eye_df

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,198_right.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
1,663_right.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
2,1025_right.jpg,hypertensive retinopathy,0,0,0,0,0,1,0,0
3,1139_right.jpg,"moderate nonproliferative retinopathy,macular ...",0,0,0,0,0,0,0,1
4,1141_right.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
995,4995_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
996,4996_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
997,4997_right.jpg,normal fundus,1,0,0,0,0,0,0,0
998,4998_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1


In [None]:
test_updated_right_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TestSet/Annotation/test_updated_right_eye.xlsx')

## ***Val data cleaning***

In [None]:
val_dataset_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/ValidSet/Annotation/ValidAnot.xlsx"

In [None]:
val_original_df = pd.read_excel(val_dataset_path)

In [None]:
val_original_df.shape

(500, 15)

In [None]:
val_original_df.head(20)

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,937,60,Female,937_left.jpg,937_right.jpg,hypertensive retinopathy,"hypertensive retinopathy,suspected diabetic re...",0,1,0,0,0,1,0,0
1,967,21,Female,967_left.jpg,967_right.jpg,moderate nonproliferative retinopathy,"moderate nonproliferative retinopathy,epiretin...",0,1,0,0,0,0,0,1
2,988,60,Female,988_left.jpg,988_right.jpg,mild nonproliferative retinopathy,"epiretinal membrane,moderate nonproliferative ...",0,1,0,0,0,0,0,1
3,995,56,Male,995_left.jpg,995_right.jpg,moderate nonproliferative retinopathy,"lens dust,retinal pigmentation",0,1,0,0,0,0,0,1
4,1000,63,Male,1000_left.jpg,1000_right.jpg,"laser spot,moderate nonproliferative retinopathy","moderate nonproliferative retinopathy,laser spot",0,1,0,0,0,0,0,1
5,1001,65,Female,1001_left.jpg,1001_right.jpg,macular epiretinal membrane,"epiretinal membrane,mild nonproliferative reti...",0,1,0,0,0,0,0,1
6,1002,51,Female,1002_left.jpg,1002_right.jpg,"drusen,mild nonproliferative retinopathy",moderate nonproliferative retinopathy,0,1,0,0,0,0,0,1
7,1003,68,Male,1003_left.jpg,1003_right.jpg,"hypertensive retinopathy,suspected diabetic re...","hypertensive retinopathy,suspected diabetic re...",0,1,0,0,0,1,0,0
8,1004,60,Male,1004_left.jpg,1004_right.jpg,moderate nonproliferative retinopathy,vitreous degeneration,0,1,0,0,0,0,0,1
9,1007,59,Male,1007_left.jpg,1007_right.jpg,"white vessel,severe nonproliferative retinopathy","laser spot,moderate nonproliferative retinopathy",0,1,0,0,0,0,0,1


In [None]:
val_left_eye_df = val_original_df[['Left-Fundus','Left-Diagnostic Keywords']].copy()
val_left_eye_df.head(15)

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords
0,937_left.jpg,hypertensive retinopathy
1,967_left.jpg,moderate nonproliferative retinopathy
2,988_left.jpg,mild nonproliferative retinopathy
3,995_left.jpg,moderate nonproliferative retinopathy
4,1000_left.jpg,"laser spot,moderate nonproliferative retinopathy"
5,1001_left.jpg,macular epiretinal membrane
6,1002_left.jpg,"drusen,mild nonproliferative retinopathy"
7,1003_left.jpg,"hypertensive retinopathy,suspected diabetic re..."
8,1004_left.jpg,moderate nonproliferative retinopathy
9,1007_left.jpg,"white vessel,severe nonproliferative retinopathy"


In [None]:
val_left_eye_df.shape

(500, 2)

In [None]:
val_left_eye_df['N'] = 0
val_left_eye_df['D'] = 0
val_left_eye_df['G'] = 0
val_left_eye_df['C'] = 0
val_left_eye_df['A'] = 0
val_left_eye_df['H'] = 0
val_left_eye_df['M'] = 0
val_left_eye_df['O'] = 0
val_left_eye_df['DELETE'] = 0

In [None]:
val_left_eye_df.head()

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,937_left.jpg,hypertensive retinopathy,0,0,0,0,0,0,0,0,0
1,967_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,0,0
2,988_left.jpg,mild nonproliferative retinopathy,0,0,0,0,0,0,0,0,0
3,995_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,0,0
4,1000_left.jpg,"laser spot,moderate nonproliferative retinopathy",0,0,0,0,0,0,0,0,0


In [None]:
i = 0
for keywords in val_left_eye_df['Left-Diagnostic Keywords']:
    keywords = keywords.strip().split(',')
    for keyword in keywords:
        if not (keyword == 'lens dust'):
            val_left_eye_df.loc[i,disease_map.get(keyword, 'O')] = 1
    i += 1
    

In [None]:
val_left_eye_df.head()
# left_eye_df[left_eye_df['Left-Diagnostic Keywords'] == 'lens dust，normal fundus']

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,937_left.jpg,hypertensive retinopathy,0,0,0,0,0,1,0,0,0
1,967_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
2,988_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
3,995_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
4,1000_left.jpg,"laser spot,moderate nonproliferative retinopathy",0,0,0,0,0,0,0,1,0


In [None]:
val_left_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/ValidSet/Annotation/val_left_eye.xlsx')

In [None]:
val_right_eye_df = val_original_df[['Right-Fundus','Right-Diagnostic Keywords']].copy()
val_right_eye_df.head(15)

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords
0,937_right.jpg,"hypertensive retinopathy,suspected diabetic re..."
1,967_right.jpg,"moderate nonproliferative retinopathy,epiretin..."
2,988_right.jpg,"epiretinal membrane,moderate nonproliferative ..."
3,995_right.jpg,"lens dust,retinal pigmentation"
4,1000_right.jpg,"moderate nonproliferative retinopathy,laser spot"
5,1001_right.jpg,"epiretinal membrane,mild nonproliferative reti..."
6,1002_right.jpg,moderate nonproliferative retinopathy
7,1003_right.jpg,"hypertensive retinopathy,suspected diabetic re..."
8,1004_right.jpg,vitreous degeneration
9,1007_right.jpg,"laser spot,moderate nonproliferative retinopathy"


In [None]:
val_right_eye_df.shape

(500, 2)

In [None]:
val_right_eye_df['N'] = 0
val_right_eye_df['D'] = 0
val_right_eye_df['G'] = 0
val_right_eye_df['C'] = 0
val_right_eye_df['A'] = 0
val_right_eye_df['H'] = 0
val_right_eye_df['M'] = 0
val_right_eye_df['O'] = 0
val_right_eye_df['DELETE'] = 0

In [None]:
val_right_eye_df.head()

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,937_right.jpg,"hypertensive retinopathy,suspected diabetic re...",0,0,0,0,0,0,0,0,0
1,967_right.jpg,"moderate nonproliferative retinopathy,epiretin...",0,0,0,0,0,0,0,0,0
2,988_right.jpg,"epiretinal membrane,moderate nonproliferative ...",0,0,0,0,0,0,0,0,0
3,995_right.jpg,"lens dust,retinal pigmentation",0,0,0,0,0,0,0,0,0
4,1000_right.jpg,"moderate nonproliferative retinopathy,laser spot",0,0,0,0,0,0,0,0,0


In [None]:
i = 0
for keywords in val_right_eye_df['Right-Diagnostic Keywords']:
    keywords = keywords.strip().split(',')
    for keyword in keywords:
        if not (keyword == 'lens dust'):
            val_right_eye_df.loc[i,disease_map.get(keyword, 'O')] = 1
    i += 1

In [None]:
val_right_eye_df.head(15)

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,937_right.jpg,"hypertensive retinopathy,suspected diabetic re...",0,1,0,0,0,1,0,0,0
1,967_right.jpg,"moderate nonproliferative retinopathy,epiretin...",0,0,0,0,0,0,0,1,0
2,988_right.jpg,"epiretinal membrane,moderate nonproliferative ...",0,0,0,0,0,0,0,1,0
3,995_right.jpg,"lens dust,retinal pigmentation",0,0,0,0,0,0,0,1,0
4,1000_right.jpg,"moderate nonproliferative retinopathy,laser spot",0,0,0,0,0,0,0,1,0
5,1001_right.jpg,"epiretinal membrane,mild nonproliferative reti...",0,1,0,0,0,0,0,1,0
6,1002_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
7,1003_right.jpg,"hypertensive retinopathy,suspected diabetic re...",0,1,0,0,0,1,0,0,0
8,1004_right.jpg,vitreous degeneration,0,0,0,0,0,0,0,1,0
9,1007_right.jpg,"laser spot,moderate nonproliferative retinopathy",0,0,0,0,0,0,0,1,0


In [None]:
val_right_eye_df.shape

(500, 11)

In [None]:
val_right_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/ValidSet/Annotation/val_right_eye.xlsx')

In [None]:
val_left_delete_index = list(val_left_eye_df[val_left_eye_df['DELETE'] == 1].index)
val_left_delete_index

[52, 456]

In [None]:
val_updated_left_eye_df = val_left_eye_df.drop(val_left_delete_index, axis=0)
val_updated_left_eye_df

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,937_left.jpg,hypertensive retinopathy,0,0,0,0,0,1,0,0,0
1,967_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
2,988_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
3,995_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1,0
4,1000_left.jpg,"laser spot,moderate nonproliferative retinopathy",0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
495,4787_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
496,4788_left.jpg,normal fundus,1,0,0,0,0,0,0,0,0
497,4789_left.jpg,severe proliferative diabetic retinopathy,0,1,0,0,0,0,0,0,0
498,4791_left.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0,0


In [None]:
val_updated_left_eye_df[val_updated_left_eye_df['DELETE'] == 1]

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE


In [None]:
val_updated_left_eye_df.drop('DELETE', axis=1, inplace=True)
val_updated_left_eye_df

Unnamed: 0,Left-Fundus,Left-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,937_left.jpg,hypertensive retinopathy,0,0,0,0,0,1,0,0
1,967_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
2,988_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
3,995_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
4,1000_left.jpg,"laser spot,moderate nonproliferative retinopathy",0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
495,4787_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
496,4788_left.jpg,normal fundus,1,0,0,0,0,0,0,0
497,4789_left.jpg,severe proliferative diabetic retinopathy,0,1,0,0,0,0,0,0
498,4791_left.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0


In [None]:
val_updated_left_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/ValidSet/Annotation/val_updated_left_eye.xlsx')

In [None]:
val_right_delete_index = list(val_right_eye_df[val_right_eye_df['DELETE'] == 1].index)
val_right_delete_index

[63, 476]

In [None]:
val_updated_right_eye_df = val_right_eye_df.drop(val_right_delete_index, axis=0)
val_updated_right_eye_df

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE
0,937_right.jpg,"hypertensive retinopathy,suspected diabetic re...",0,1,0,0,0,1,0,0,0
1,967_right.jpg,"moderate nonproliferative retinopathy,epiretin...",0,0,0,0,0,0,0,1,0
2,988_right.jpg,"epiretinal membrane,moderate nonproliferative ...",0,0,0,0,0,0,0,1,0
3,995_right.jpg,"lens dust,retinal pigmentation",0,0,0,0,0,0,0,1,0
4,1000_right.jpg,"moderate nonproliferative retinopathy,laser spot",0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
495,4787_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
496,4788_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,0
497,4789_right.jpg,proliferative diabetic retinopathy,0,1,0,0,0,0,0,0,0
498,4791_right.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0,0


In [None]:
val_updated_right_eye_df[val_updated_right_eye_df['DELETE'] == 1]

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,DELETE


In [None]:
val_updated_right_eye_df.drop('DELETE', axis=1, inplace=True)
val_updated_right_eye_df

Unnamed: 0,Right-Fundus,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,937_right.jpg,"hypertensive retinopathy,suspected diabetic re...",0,1,0,0,0,1,0,0
1,967_right.jpg,"moderate nonproliferative retinopathy,epiretin...",0,0,0,0,0,0,0,1
2,988_right.jpg,"epiretinal membrane,moderate nonproliferative ...",0,0,0,0,0,0,0,1
3,995_right.jpg,"lens dust,retinal pigmentation",0,0,0,0,0,0,0,1
4,1000_right.jpg,"moderate nonproliferative retinopathy,laser spot",0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
495,4787_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
496,4788_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
497,4789_right.jpg,proliferative diabetic retinopathy,0,1,0,0,0,0,0,0
498,4791_right.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0


In [None]:
val_updated_right_eye_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/ValidSet/Annotation/val_updated_right_eye.xlsx')

# Concating left and right


In [None]:
train_left_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/updated_left_eye.xlsx"

In [None]:
train_left_df = pd.read_excel(train_left_path)

In [None]:
train_left_df.rename(columns={'Left-Fundus': 'Fundus','Left-Diagnostic Keywords':'Diagnostic_Keywords'}, inplace=True)

In [None]:
train_left_df

Unnamed: 0.1,Unnamed: 0,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,0_left.jpg,cataract,0,0,0,1,0,0,0,0
1,1,1_left.jpg,normal fundus,1,0,0,0,0,0,0,0
2,2,2_left.jpg,laser spot，moderate non proliferative retinopathy,0,1,0,0,0,0,0,1
3,3,3_left.jpg,normal fundus,1,0,0,0,0,0,0,0
4,4,4_left.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
3471,3495,4686_left.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0
3472,3496,4688_left.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0
3473,3497,4689_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
3474,3498,4690_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0


In [None]:
train_right_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/updated_right_eye.xlsx"

In [None]:
train_right_df = pd.read_excel(train_right_path)

In [None]:
train_right_df.rename(columns={'Right-Fundus': 'Fundus','Right-Diagnostic Keywords':'Diagnostic_Keywords'}, inplace=True)

In [None]:
train_right_df

Unnamed: 0.1,Unnamed: 0,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,0_right.jpg,normal fundus,1,0,0,0,0,0,0,0
1,1,1_right.jpg,normal fundus,1,0,0,0,0,0,0,0
2,2,2_right.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0
3,3,3_right.jpg,branch retinal artery occlusion,0,0,0,0,0,0,0,1
4,4,4_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
3478,3495,4686_right.jpg,proliferative diabetic retinopathy,0,1,0,0,0,0,0,0
3479,3496,4688_right.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0
3480,3497,4689_right.jpg,normal fundus,1,0,0,0,0,0,0,0
3481,3498,4690_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0


In [None]:
train_df = pd.concat([train_left_df, train_right_df], axis=0)
train_df.head()

Unnamed: 0.1,Unnamed: 0,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,0_left.jpg,cataract,0,0,0,1,0,0,0,0
1,1,1_left.jpg,normal fundus,1,0,0,0,0,0,0,0
2,2,2_left.jpg,laser spot，moderate non proliferative retinopathy,0,1,0,0,0,0,0,1
3,3,3_left.jpg,normal fundus,1,0,0,0,0,0,0,0
4,4,4_left.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1


In [None]:
train_df.shape

(6959, 11)

In [None]:
train_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/train_labels.xlsx')

In [None]:
test_left_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TestSet/Annotation/test_updated_left_eye.xlsx"

In [None]:
test_left_df = pd.read_excel(test_left_path)

In [None]:
test_left_df.rename(columns={'Left-Fundus': 'Fundus','Left-Diagnostic Keywords':'Diagnostic_Keywords'}, inplace=True)

In [None]:
test_left_df

Unnamed: 0.1,Unnamed: 0,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,198_left.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
1,1,663_left.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
2,2,1025_left.jpg,"hypertensive retinopathy,macular epiretinal me...",0,0,0,0,0,1,0,1
3,3,1139_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
4,4,1141_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
993,995,4995_left.jpg,normal fundus,1,0,0,0,0,0,0,0
994,996,4996_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
995,997,4997_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
996,998,4998_left.jpg,normal fundus,1,0,0,0,0,0,0,0


In [None]:
test_right_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TestSet/Annotation/test_updated_right_eye.xlsx"

In [None]:
test_right_df = pd.read_excel(test_right_path)

In [None]:
test_right_df.rename(columns={'Right-Fundus': 'Fundus','Right-Diagnostic Keywords':'Diagnostic_Keywords'}, inplace=True)

In [None]:
test_right_df

Unnamed: 0.1,Unnamed: 0,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,198_right.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
1,1,663_right.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
2,2,1025_right.jpg,hypertensive retinopathy,0,0,0,0,0,1,0,0
3,3,1139_right.jpg,"moderate nonproliferative retinopathy,macular ...",0,0,0,0,0,0,0,1
4,4,1141_right.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
994,995,4995_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
995,996,4996_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
996,997,4997_right.jpg,normal fundus,1,0,0,0,0,0,0,0
997,998,4998_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1


In [None]:
test_df = pd.concat([test_left_df, test_right_df], axis=0)
test_df.head()

Unnamed: 0.1,Unnamed: 0,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,198_left.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
1,1,663_left.jpg,"cataract,hypertensive retinopathy",0,0,0,1,0,1,0,0
2,2,1025_left.jpg,"hypertensive retinopathy,macular epiretinal me...",0,0,0,0,0,1,0,1
3,3,1139_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
4,4,1141_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0


In [None]:
test_df.shape

(1997, 11)

In [None]:
test_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TestSet/Annotation/test_labels.xlsx')

In [None]:
val_left_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/ValidSet/Annotation/val_updated_left_eye.xlsx"

In [None]:
val_left_df = pd.read_excel(val_left_path)

In [None]:
val_left_df.rename(columns={'Left-Fundus': 'Fundus','Left-Diagnostic Keywords':'Diagnostic_Keywords'}, inplace=True)

In [None]:
val_left_df

Unnamed: 0.1,Unnamed: 0,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,937_left.jpg,hypertensive retinopathy,0,0,0,0,0,1,0,0
1,1,967_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
2,2,988_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
3,3,995_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
4,4,1000_left.jpg,"laser spot,moderate nonproliferative retinopathy",0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
493,495,4787_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
494,496,4788_left.jpg,normal fundus,1,0,0,0,0,0,0,0
495,497,4789_left.jpg,severe proliferative diabetic retinopathy,0,1,0,0,0,0,0,0
496,498,4791_left.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0


In [None]:
val_right_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/ValidSet/Annotation/val_updated_right_eye.xlsx"

In [None]:
val_right_df = pd.read_excel(val_right_path)

In [None]:
val_right_df.rename(columns={'Right-Fundus': 'Fundus','Right-Diagnostic Keywords':'Diagnostic_Keywords'}, inplace=True)

In [None]:
val_right_df

Unnamed: 0.1,Unnamed: 0,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,937_right.jpg,"hypertensive retinopathy,suspected diabetic re...",0,1,0,0,0,1,0,0
1,1,967_right.jpg,"moderate nonproliferative retinopathy,epiretin...",0,0,0,0,0,0,0,1
2,2,988_right.jpg,"epiretinal membrane,moderate nonproliferative ...",0,0,0,0,0,0,0,1
3,3,995_right.jpg,"lens dust,retinal pigmentation",0,0,0,0,0,0,0,1
4,4,1000_right.jpg,"moderate nonproliferative retinopathy,laser spot",0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
493,495,4787_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
494,496,4788_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
495,497,4789_right.jpg,proliferative diabetic retinopathy,0,1,0,0,0,0,0,0
496,498,4791_right.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0


In [None]:
val_df = pd.concat([val_left_df, val_right_df], axis=0)
val_df.head()

Unnamed: 0.1,Unnamed: 0,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,937_left.jpg,hypertensive retinopathy,0,0,0,0,0,1,0,0
1,1,967_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
2,2,988_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
3,3,995_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
4,4,1000_left.jpg,"laser spot,moderate nonproliferative retinopathy",0,0,0,0,0,0,0,1


In [None]:
val_df.shape

(996, 11)

In [None]:
val_df.to_excel('/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/ValidSet/Annotation/val_labels.xlsx')

# Single Disease df

In [None]:
train_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TrainSet/Annotation/train_labels.xlsx"

In [None]:
df = pd.read_excel(train_path)

In [None]:
df_single =df[df['N']+df['D']+df['G']+df['C']+df['A']+df['H']+df['M']+df['O'] == 1]

In [None]:
df_single

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,0,0_left.jpg,cataract,0,0,0,1,0,0,0,0
1,1,1,1_left.jpg,normal fundus,1,0,0,0,0,0,0,0
3,3,3,3_left.jpg,normal fundus,1,0,0,0,0,0,0,0
4,4,4,4_left.jpg,macular epiretinal membrane,0,0,0,0,0,0,0,1
5,5,5,5_left.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
6953,3477,3494,4683_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
6954,3478,3495,4686_right.jpg,proliferative diabetic retinopathy,0,1,0,0,0,0,0,0
6955,3479,3496,4688_right.jpg,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0
6956,3480,3497,4689_right.jpg,normal fundus,1,0,0,0,0,0,0,0


In [None]:
df_single.to_excel('/content/drive/MyDrive/ResearchWork_2023/Single_disease/train_labels.xlsx')

In [None]:
test_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/TestSet/Annotation/test_labels.xlsx"

In [None]:
df = pd.read_excel(test_path)

In [None]:
df_single =df[df['N']+df['D']+df['G']+df['C']+df['A']+df['H']+df['M']+df['O'] == 1]

In [None]:
df_single

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
3,3,3,1139_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
4,4,4,1141_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
5,5,5,1149_left.jpg,"laser spot,moderate nonproliferative retinopathy",0,0,0,0,0,0,0,1
6,6,6,1155_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
7,7,7,1158_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
1992,994,995,4995_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
1993,995,996,4996_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
1994,996,997,4997_right.jpg,normal fundus,1,0,0,0,0,0,0,0
1995,997,998,4998_right.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1


In [None]:
df_single.to_excel('/content/drive/MyDrive/ResearchWork_2023/Single_disease/test_labels.xlsx')

In [None]:
val_path = "/content/drive/MyDrive/ResearchWork_2023/OIA_ODIR/ValidSet/Annotation/val_labels.xlsx"

In [None]:
df = pd.read_excel(val_path)

In [None]:
df_single =df[df['N']+df['D']+df['G']+df['C']+df['A']+df['H']+df['M']+df['O'] == 1]

In [None]:
df_single

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Fundus,Diagnostic_Keywords,N,D,G,C,A,H,M,O
0,0,0,937_left.jpg,hypertensive retinopathy,0,0,0,0,0,1,0,0
1,1,1,967_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
2,2,2,988_left.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
3,3,3,995_left.jpg,moderate nonproliferative retinopathy,0,0,0,0,0,0,0,1
4,4,4,1000_left.jpg,"laser spot,moderate nonproliferative retinopathy",0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
991,493,495,4787_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
992,494,496,4788_right.jpg,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0
993,495,497,4789_right.jpg,proliferative diabetic retinopathy,0,1,0,0,0,0,0,0
994,496,498,4791_right.jpg,severe nonproliferative retinopathy,0,1,0,0,0,0,0,0


In [None]:
df_single.to_excel('/content/drive/MyDrive/ResearchWork_2023/Single_disease/val_labels.xlsx')