# Ocular Disease Recognition - EDA

In [None]:
%%capture
!pip install openpyxl

In [None]:
import numpy as np
import pandas as pd 
import os
import matplotlib 
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import cv2
from textwrap import wrap

In [None]:
plt.rcParams['figure.figsize'] = (6.0, 4.0)
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rcParams['axes.unicode_minus'] = False

In [None]:
DATA_PATH = '/kaggle/input/ocular-disease-recognition-odir5k/ODIR-5K/ODIR-5K/data.xlsx'
IMG_DIR = '/kaggle/input/ocular-disease-recognition-odir5k/ODIR-5K/ODIR-5K/Training Images/'
file_names = sorted(os.listdir(IMG_DIR))

In [None]:
main_df = pd.read_excel(DATA_PATH)
print(main_df.shape)
main_df.head()

# General analysis

In [None]:
# Crude estimation of findings ratio
findings = main_df.iloc[:, -8:]
totals = findings.sum()
totals

In [None]:
extended_labels = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'Age related Macular Degeneration', 'Hypertension', 'Pathological Myopia', 'Other abnormalities']
plt.pie(totals, labels=extended_labels, startangle=90)
plt.show()

In [None]:
ages = main_df['Patient Age'].value_counts()
plt.bar(ages.index, ages)
plt.show()

In [None]:
sex = main_df['Patient Sex'].value_counts()
plt.bar(sex.index, sex)
plt.show()

In [None]:
# Example image
img = '10_right.jpg'
image = cv2.imread(os.path.join(IMG_DIR, img))
plt.imshow(image)
print(image.shape)

Note, that this image is NOT displayed correctly! Images of eye fundus have reddish tint due to all the blood vessels. The blue tint in this picture comes from the way cv2 reads images by default (Blue Green Red instead of RGB). To fix this:

In [None]:
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
plt.imshow(image_rgb) 

In [None]:
# Helper functions
def filter_df(df, letter):
    filtered = df.loc[(df[letter] == 1)]
    return filtered

def get_ages_genders(df):
    ages = df['Patient Age'].value_counts()
    sex = df['Patient Sex'].value_counts()

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,4))
    fig.suptitle('Age and Sex Distribution')
    ax1.bar(ages.index, ages)
    ax2.bar(sex.index, sex)

def display_grid(df, keyword):
    rows = 2
    cols = 4
    nr_images = rows*cols
    axes=[]
    fig=plt.figure(figsize=(16,8))
    filtered = df.loc[df['Left-Diagnostic Keywords'].str.contains(keyword)]
    
    if filtered.shape[0] < nr_images:
        nr_images = filtered.shape[0]

    for i in range(nr_images):
        file_name = filtered.iloc[i]['ID']
        file_name = str(file_name) + '_left.jpg'
        image = cv2.imread(os.path.join(IMG_DIR, file_name))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # crop horizontal image for esthetics
        if image.shape[1] > image.shape[0]:
            w = image.shape[0]
            x = (image.shape[1] - w)//2
            image = image[:, x:x+w]

        axes.append( fig.add_subplot(rows, cols, i+1) )
        subplot_title = filtered.iloc[i]['Left-Diagnostic Keywords']
        # replacing symbol that looks like comma but doesn't show correctly with a regular comma
        subplot_title = subplot_title.replace('，', ', ')  
        axes[-1].set_title('\n'.join(wrap(subplot_title,40)))  
        plt.axis('off')
        plt.imshow(image)
        
    fig.tight_layout()    
    plt.show()

# Normal

In [None]:
df_N = filter_df(main_df, 'N')
df_N

In [None]:
normal = df_N.shape[0]/main_df.shape[0]*100
normal_f = "{:.2f}".format(normal)
print(f'fraction of healthy patients: {normal_f}%')

In [None]:
display_grid(df_N, 'normal fundus')

The fundus is clearly visible, with no clouding of the anatomical structures. The optic disc appears pink with sharp margins. The vasculature is normal in course and caliber. There are no lesions, scars, pigmentary changes, hemorrhages. The fovea exhibits a crisp foveal light reflex.

# Glaucoma

In [None]:
df_G = filter_df(main_df, 'G')
df_G

In [None]:
get_ages_genders(df_G)

In [None]:
display_grid(df_G, 'glaucoma')

Basically, glaucoma means damage of the **optic nerve**, usually due to high pressure in the eye. With time this could lead to blindness and the worst part is that it is usually noticeable on advanced stages, so the only way to diagnose it early is via regular eye checkups.<br><br>
The Cup-to-disc ratio is used to assess glaucoma on the ocular images. Anatomically, the optic disc is the area where the optic nerve and blood vessels enter the retina. The "cup" is the white central part of the optic disc whith no nerve fibers (really bright spot). The nerve fibers are located in the rim of the disc. <br><br>
The cup-to-disc ratio = diameter of the cup / diameter of the optic disc. <br><br>
The normal cup-to-disc ratio is 0.4. As glaucoma advances, the cup enlarges occupying more of the disc area and the cup-to-disc ratio increases.

One more note: the cup-to-disc ratio on its own is not an indication of glaucoma, rather its increase over time as the patient ages.

# Cataract

In [None]:
df_C = filter_df(main_df, 'C')
df_C

In [None]:
get_ages_genders(df_C)

In [None]:
display_grid(df_C, 'cataract')

A cataract is a cloudy area in the eye lens. Usually it is age-related, but could be due to other causes as well (e.g. after an eye injury).<br>
The anatomical structures of the eye are less visible on the images. In advanced cases, they might be completely blocked from view by the cataract.

# Diabetes

In [None]:
df_D = filter_df(main_df, 'D')
df_D

In [None]:
get_ages_genders(df_D)

In [None]:
display_grid(df_D, 'proliferative retinopathy')

Diabetic retinopathy is a diabetes complication (type 1 or type 2 diabetes). It's caused by damage to the blood vessels of the retina.<br>
On the images often can be observed signs of retinal edema, macular edema, hemorrhages, micro-aneurysms, micro-vascular abnormalities.

# Age-related Macular Degeneration (AMD)

In [None]:
df_A = filter_df(main_df, 'A')
df_A

In [None]:
get_ages_genders(df_A)

In [None]:
display_grid(df_A, 'age-related')

The macula is a small area in the center of the retina, its purpose - perception of details of objects in front of you. On the images it appears as a dark spot somewhere near the center. <br>
Macular degeneration is a medical condition which typically occurs in older people and may result in blurred or no vision in the center of the visual field. While it does not result in complete blindness, loss of central vision can make it hard to recognize faces, drive, read, or perform other activities of daily life. <br><br>
There are two types of AMD (two characteristic appearances on the images): One form is known as “dry” (most common) and the other is “wet” (severe form). <br>

* In the **dry** form, there is an atrophy of the retinal pigment epithelial cells (RPE) in the macula. These RPE cells are light sensitive and contain hundreds of photoreceptors. It is characterized by the presence of drusen (dots of yellow crystalline deposits that develop within the macula) and thinning of the macula. <br>
* With the **wet** form, the membrane underlying the retina thickens, then breaks. The oxygen supply to the macula is disrupted and the body responds by growing new, abnormal blood vessels. These begin to grow through the breaks of the membrane behind the retina towards the macula, often raising the retina. These abnormal blood vessels tend to be very fragile. They often grow, leak or bleed, causing scarring of the macula. This damage to the macula results in rapid central vision loss. 

# Hypertension

In [None]:
df_H = filter_df(main_df, 'H')
df_H

In [None]:
get_ages_genders(df_H)

In [None]:
display_grid(df_H, 'hypertensive')

Chronic hypertension can lead to the hypertensive retinopathy. 
### the Scheie Classification
The signs of malignant hypertension:

* Grade 0: No changes
* Grade 1: Barely detectable arterial narrowing
* Grade 2: Obvious arterial narrowing with focal irregularities
* Grade 3: Grade 2 plus retinal hemorrhages, exudates, cotton wool spots, or retinal edema
* Grade 4: Grade 3 plus papilledema

The signs of chronic arteriosclerotic hypertension:

* Stage 1: Widening of the arteriole reflex
* Stage 2: Arteriovenous crossing sign
* Stage 3: Copper-wire arteries (copper colored arteriole light reflex)
* Stage 4: Silver-wire arteries (silver colored arteriole light reflex).

<img src="https://eyewiki.aao.org/w/images/1/9/9d/HTN_RET_Findings.jpg" width="600">
<center>(source: https://eyewiki.aao.org/Hypertensive_Retinopathy)</center>

# Pathological Myopia

In [None]:
df_M = filter_df(main_df, 'M')
df_M

In [None]:
get_ages_genders(df_M)

In [None]:
display_grid(df_M, 'myopia')

Myopia = near-sightedness (common problem)<br><br>
**Pathologic** myopia is a small fraction of myopa cases (approximately 1%), in which the tissues of the eyes are stretched and damaged to various degrees. It is primarily a genetic condition.<br><br>
Images: retina is stretched and thinned, with thin blood vessels, a distorted optic disc, folds in the retina, pigmentation at the macula. 

# Other abnormalities

In [None]:
df_O = filter_df(main_df, 'O')
df_O

In [None]:
get_ages_genders(df_O)

In [None]:
# obviously, there is no keyword characteristic for "other pathologies" section, but this one seems to do just fine 
display_grid(df_O, ' retinal')

# Correlation of ocular diseases

In [None]:
co = main_df.iloc[:,-7:]
correlation = co.T.dot(co)
correlation

In [None]:
totals = co.sum()
correlation_2 = correlation / totals 
correlation_2.style.background_gradient().set_precision(2)

These numbers show how often each pair of diseases occurred together relative to the total cases of the disease specified in the **column**. <br>
E.g. In this database we had 45 cases where hypertensive retinopathy occurred together with diabetes-related changes. This means that out of 103 patients with hypertensive retinopathy 45 had also diabetes, which is 44% of all cases (definitely a correlation). On the other hand, for 1128 total cases of diabetes 45 cases is not much (only 4%).<br>
Also in 10-27% of cases (depending on specific disease) other non-categorized pathologies were present, but there is only one correlation vice versa (in 31% of cases with non-categorized pathologies diabetic changes were also present)