In [1]:
import os
import cv2
import numpy as np

In [2]:
def breaker(num: int = 50, char: str = "*") -> None:
    print("\n" + num*char + "\n")


def preprocess(image: np.ndarray, size: int) -> np.ndarray:
    return cv2.resize(src=cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB), dsize=(size, size), interpolation=cv2.INTER_AREA)


def get_images(path: str, names: np.ndarray, size: int) -> np.ndarray:
    images = np.zeros((len(names), size, size, 3), dtype=np.uint8)
    
    i = 0
    for name in names:
        images[i] = preprocess(cv2.imread(os.path.join(path, name), cv2.IMREAD_COLOR), size)
        i += 1
    return images

    
def get_statistics(images: np.ndarray, size: int) -> None:
    print(f"Mean {size}x{size}\n")
    print(f"Red Channel Mean   {size} : {images[:, :, :, 0].mean() / 255:.5f}")
    print(f"Green Channel Mean {size} : {images[:, :, :, 1].mean() / 255:.5f}")
    print(f"Blue Channel Mean  {size} : {images[:, :, :, 2].mean() / 255:.5f}")
    print("")
    print(f"Standard Deviation {size}x{size}\n")
    print(f"Red Channel Std    {size} : {images[:, :, :, 0].std() / 255:.5f}")
    print(f"Green Channel Std  {size} : {images[:, :, :, 1].std() / 255:.5f}")
    print(f"Blue Channel Std   {size} : {images[:, :, :, 2].std() / 255:.5f}")


In [3]:
sizes = [256, 384, 512]

breaker()
for size in sizes:
    normal_images = get_images("../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Normal", 
                               os.listdir("../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Normal"),
                               size)
    
    tuberculosis_images = get_images("../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Tuberculosis", 
                                     os.listdir("../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Tuberculosis"),
                                     size)
    
    images = np.concatenate((normal_images, tuberculosis_images), axis=0)
    targets = np.concatenate((np.zeros(len(normal_images),), np.ones(len(tuberculosis_images),)), axis=0)
    
    np.save(f"./images_{size}.npy", images)
    np.save(f"./targets_{size}.npy", targets)
    
    get_statistics(images, size)
    breaker()


**************************************************

Mean 256x256

Red Channel Mean   256 : 0.50171
Green Channel Mean 256 : 0.51011
Blue Channel Mean  256 : 0.51292

Standard Deviation 256x256

Red Channel Std    256 : 0.25925
Green Channel Std  256 : 0.25933
Blue Channel Std   256 : 0.26062

**************************************************

Mean 384x384

Red Channel Mean   384 : 0.50122
Green Channel Mean 384 : 0.50962
Blue Channel Mean  384 : 0.51243

Standard Deviation 384x384

Red Channel Std    384 : 0.25932
Green Channel Std  384 : 0.25939
Blue Channel Std   384 : 0.26068

**************************************************

Mean 512x512

Red Channel Mean   512 : 0.50122
Green Channel Mean 512 : 0.50962
Blue Channel Mean  512 : 0.51243

Standard Deviation 512x512

Red Channel Std    512 : 0.25966
Green Channel Std  512 : 0.25973
Blue Channel Std   512 : 0.26102

**************************************************

