In [1]:
%%bash
pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.0.9-py2.py3-none-any.whl (242 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 242.2/242.2 KB 1.6 MB/s eta 0:00:00
Collecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.9




In [2]:
import os
import cv2
import numpy as np
import pandas as pd

BASE_PATH = "../input/ocular-disease-recognition-odir5k/ODIR-5K/ODIR-5K"
SIZE = 256

In [3]:
def breaker(num: int = 50, char: str = "*") -> None:
    print("\n" + num*char + "\n")

    
def preprocess(image: np.ndarray, size: int) -> np.ndarray:
    return cv2.resize(src=cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB), dsize=(size, size), interpolation=cv2.INTER_AREA)


def get_images(path: str, names: np.ndarray, size: int) -> np.ndarray:
    images = np.zeros((len(names), size, size, 3), dtype=np.uint8)
    
    i = 0
    for name in names:
        images[i] = preprocess(cv2.imread(os.path.join(path, name), cv2.IMREAD_COLOR), size)
        i += 1
    return images


def get_statistics(images: np.ndarray, size: int) -> None:
    print(f"Mean {size}x{size}\n")
    print(f"Red Channel Mean   {size} : {images[:, :, :, 0].mean() / 255:.5f}")
    print(f"Green Channel Mean {size} : {images[:, :, :, 1].mean() / 255:.5f}")
    print(f"Blue Channel Mean  {size} : {images[:, :, :, 2].mean() / 255:.5f}")
    print("")
    print(f"Standard Deviation {size}x{size}\n")
    print(f"Red Channel Std    {size} : {images[:, :, :, 0].std() / 255:.5f}")
    print(f"Green Channel Std  {size} : {images[:, :, :, 1].std() / 255:.5f}")
    print(f"Blue Channel Std   {size} : {images[:, :, :, 2].std() / 255:.5f}")

In [4]:
df = pd.read_excel(os.path.join(BASE_PATH, "data.xlsx"))

left_filenames = df["Left-Fundus"].copy().values
right_filenames = df["Right-Fundus"].copy().values
targets = df.iloc[:, -8:].copy().values

left_images  = get_images(os.path.join(BASE_PATH, "Training Images"), left_filenames, SIZE)
right_images = get_images(os.path.join(BASE_PATH, "Training Images"), right_filenames, SIZE)

np.save(f"./left_images_{SIZE}.npy", left_images)
np.save(f"./right_images_{SIZE}.npy", right_images)
np.save(f"./targets_{SIZE}.npy", targets)

In [5]:
breaker()
print("Left-Fundus Images\n")
get_statistics(left_images, SIZE)
breaker()
print("Right-Fundus Images\n")
get_statistics(right_images, SIZE)
breaker()


**************************************************

Left-Fundus Images

Mean 256x256

Red Channel Mean   256 : 0.29863
Green Channel Mean 256 : 0.18990
Blue Channel Mean  256 : 0.10370

Standard Deviation 256x256

Red Channel Std    256 : 0.30966
Green Channel Std  256 : 0.21207
Blue Channel Std   256 : 0.13875

**************************************************

Right-Fundus Images

Mean 256x256

Red Channel Mean   256 : 0.30126
Green Channel Mean 256 : 0.19034
Blue Channel Mean  256 : 0.10204

Standard Deviation 256x256

Red Channel Std    256 : 0.31159
Green Channel Std  256 : 0.21214
Blue Channel Std   256 : 0.13689

**************************************************

