In [1]:
%%bash
pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.0.9-py2.py3-none-any.whl (242 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 242.2/242.2 KB 4.9 MB/s eta 0:00:00
Collecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.9




In [2]:
import os
import cv2
import numpy as np
import pandas as pd

BASE_PATH = "../input/ocular-disease-recognition-odir5k/ODIR-5K/ODIR-5K"

In [3]:
def breaker(num: int = 50, char: str = "*") -> None:
    print("\n" + num*char + "\n")


def preprocess(image: np.ndarray) -> np.ndarray:
    return cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB)


def get_image(path: str, size: int = None) -> np.ndarray:
    image = cv2.cvtColor(src=cv2.imread(path, cv2.IMREAD_COLOR), code=cv2.COLOR_BGR2RGB)
    if size:
        return cv2.resize(src=image, dsize=(size, size), interpolation=cv2.INTER_AREA)
    return image


def get_statistics(path: str, names: np.ndarray, sizes: list) -> list:
    r_means, g_means, b_means, r_stds, g_stds, b_stds = [], [], [], [], [], []

    i = 0
    for size in sizes:
        r_mean, g_mean, b_mean, r_std, g_std, b_std = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
        for name in names:
            main_image = preprocess(cv2.imread(os.path.join(path, name), cv2.IMREAD_COLOR))
            image = cv2.resize(src=main_image, dsize=(size, size), interpolation=cv2.INTER_AREA)
            r_mean += image[:, :, 0].mean()
            g_mean += image[:, :, 1].mean()
            b_mean += image[:, :, 2].mean()
            r_std  += image[:, :, 0].std()
            g_std  += image[:, :, 1].std()
            b_std  += image[:, :, 2].std()
        
        r_means.append(r_mean / len(names))
        g_means.append(g_mean / len(names))
        b_means.append(b_mean / len(names))

        r_stds.append(r_std / len(names))
        g_stds.append(g_std / len(names))
        b_stds.append(b_std / len(names))
    
    return r_means, g_means, b_means, r_stds, g_stds, b_stds 

In [4]:
df = pd.read_excel(os.path.join(BASE_PATH, "data.xlsx"))

left_filenames = df["Left-Fundus"].copy().values
right_filenames = df["Right-Fundus"].copy().values

sizes = [256, 384, 512, 768, 1024]

In [5]:
left_r_means, left_g_means, left_b_means, left_r_stds, left_g_stds, left_b_stds       = get_statistics(os.path.join(BASE_PATH, "Training Images"), left_filenames, sizes)
right_r_means, right_g_means, right_b_means, right_r_stds, right_g_stds, right_b_stds = get_statistics(os.path.join(BASE_PATH, "Training Images"), right_filenames, sizes)

In [6]:
breaker()
print("Left-Fundus Images\n")
for i in range(len(sizes)):
    print(f"Red Channel Mean ({sizes[i]})   : {left_r_means[i] / 255:.5f}")
    print(f"Green Channel Mean ({sizes[i]}) : {left_g_means[i] / 255:.5f}")
    print(f"Blue Channel Mean ({sizes[i]})  : {left_b_means[i] / 255:.5f}")
    print("\n")
    print(f"Red Channel Std ({sizes[i]})    : {left_r_stds[i] / 255:.5f}")
    print(f"Green Channel Std ({sizes[i]})  : {left_g_stds[i] / 255:.5f}")
    print(f"Blue Channel Std ({sizes[i]})   : {left_b_stds[i] / 255:.5f}")
    breaker()


**************************************************

Left-Fundus Images

Red Channel Mean (256)   : 0.29863
Green Channel Mean (256) : 0.18990
Blue Channel Mean (256)  : 0.10370


Red Channel Std (256)    : 0.28263
Green Channel Std (256)  : 0.18761
Blue Channel Std (256)   : 0.10822

**************************************************

Red Channel Mean (384)   : 0.29863
Green Channel Mean (384) : 0.18990
Blue Channel Mean (384)  : 0.10370


Red Channel Std (384)    : 0.28289
Green Channel Std (384)  : 0.18788
Blue Channel Std (384)   : 0.10848

**************************************************

Red Channel Mean (512)   : 0.29863
Green Channel Mean (512) : 0.18990
Blue Channel Mean (512)  : 0.10370


Red Channel Std (512)    : 0.28301
Green Channel Std (512)  : 0.18801
Blue Channel Std (512)   : 0.10863

**************************************************

Red Channel Mean (768)   : 0.29863
Green Channel Mean (768) : 0.18990
Blue Channel Mean (768)  : 0.10370


Red Channel Std (768)    

In [7]:
breaker()
print("Right-Fundus Images\n")
for i in range(len(sizes)):
    print(f"Red Channel Mean ({sizes[i]})   : {right_r_means[i] / 255:.5f}")
    print(f"Green Channel Mean ({sizes[i]}) : {right_g_means[i] / 255:.5f}")
    print(f"Blue Channel Mean ({sizes[i]})  : {right_b_means[i] / 255:.5f}")
    print("\n")
    print(f"Red Channel Std ({sizes[i]})    : {right_r_stds[i] / 255:.5f}")
    print(f"Green Channel Std ({sizes[i]})  : {right_g_stds[i] / 255:.5f}")
    print(f"Blue Channel Std ({sizes[i]})   : {right_b_stds[i] / 255:.5f}")
    breaker()


**************************************************

Right-Fundus Images

Red Channel Mean (256)   : 0.30126
Green Channel Mean (256) : 0.19034
Blue Channel Mean (256)  : 0.10204


Red Channel Std (256)    : 0.28629
Green Channel Std (256)  : 0.18920
Blue Channel Std (256)   : 0.10751

**************************************************

Red Channel Mean (384)   : 0.30126
Green Channel Mean (384) : 0.19034
Blue Channel Mean (384)  : 0.10203


Red Channel Std (384)    : 0.28655
Green Channel Std (384)  : 0.18947
Blue Channel Std (384)   : 0.10776

**************************************************

Red Channel Mean (512)   : 0.30126
Green Channel Mean (512) : 0.19034
Blue Channel Mean (512)  : 0.10203


Red Channel Std (512)    : 0.28667
Green Channel Std (512)  : 0.18960
Blue Channel Std (512)   : 0.10791

**************************************************

Red Channel Mean (768)   : 0.30126
Green Channel Mean (768) : 0.19034
Blue Channel Mean (768)  : 0.10203


Red Channel Std (768)   