In [1]:
import os
import cv2
import numpy as np
import pandas as pd

BASE_PATH = "../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database"

In [2]:
def breaker(num: int = 50, char: str = "*") -> None:
    print("\n" + num*char + "\n")


def preprocess(image: np.ndarray) -> np.ndarray:
    return cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB)


def get_statistics(path: str, names: np.ndarray, sizes: list) -> list:
    r_means, g_means, b_means, r_stds, g_stds, b_stds = [], [], [], [], [], []

    i = 0
    for size in sizes:
        r_mean, g_mean, b_mean, r_std, g_std, b_std = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
        for name in names:
            main_image = preprocess(cv2.imread(os.path.join(path, name), cv2.IMREAD_COLOR))
            image = cv2.resize(src=main_image, dsize=(size, size), interpolation=cv2.INTER_AREA)
            r_mean += image[:, :, 0].mean()
            g_mean += image[:, :, 1].mean()
            b_mean += image[:, :, 2].mean()
            r_std  += image[:, :, 0].std()
            g_std  += image[:, :, 1].std()
            b_std  += image[:, :, 2].std()
            
        r_means.append(r_mean / len(names))
        g_means.append(g_mean / len(names))
        b_means.append(b_mean / len(names))

        r_stds.append(r_std / len(names))
        g_stds.append(g_std / len(names))
        b_stds.append(b_std / len(names))
    
    return r_means, g_means, b_means, r_stds, g_stds, b_stds 

In [3]:
sizes = [256, 384, 512]


n_r_means, n_g_means, n_b_means, n_r_stds, n_g_stds, n_b_stds = get_statistics("../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Normal", 
                                                                               os.listdir("../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Normal"),
                                                                               sizes)

t_r_means, t_g_means, t_b_means, t_r_stds, t_g_stds, t_b_stds = get_statistics("../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Tuberculosis", 
                                                                               os.listdir("../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Tuberculosis"),
                                                                               sizes)

In [4]:
breaker()
print("Normal\n")
for i in range(len(sizes)):
    print(f"Red Channel Mean ({sizes[i]})   : {n_r_means[i] / 255:.5f}")
    print(f"Green Channel Mean ({sizes[i]}) : {n_g_means[i] / 255:.5f}")
    print(f"Blue Channel Mean ({sizes[i]})  : {n_b_means[i] / 255:.5f}")
    print("\n")
    print(f"Red Channel Std ({sizes[i]})    : {n_r_stds[i] / 255:.5f}")
    print(f"Green Channel Std ({sizes[i]})  : {n_g_stds[i] / 255:.5f}")
    print(f"Blue Channel Std ({sizes[i]})   : {n_b_stds[i] / 255:.5f}")
    breaker()


**************************************************

Normal

Red Channel Mean (256)   : 0.51077
Green Channel Mean (256) : 0.51077
Blue Channel Mean (256)  : 0.51077


Red Channel Std (256)    : 0.24285
Green Channel Std (256)  : 0.24285
Blue Channel Std (256)   : 0.24285

**************************************************

Red Channel Mean (384)   : 0.51029
Green Channel Mean (384) : 0.51029
Blue Channel Mean (384)  : 0.51029


Red Channel Std (384)    : 0.24294
Green Channel Std (384)  : 0.24294
Blue Channel Std (384)   : 0.24294

**************************************************

Red Channel Mean (512)   : 0.51029
Green Channel Mean (512) : 0.51029
Blue Channel Mean (512)  : 0.51029


Red Channel Std (512)    : 0.24333
Green Channel Std (512)  : 0.24333
Blue Channel Std (512)   : 0.24333

**************************************************



In [5]:
breaker()
print("Tuberculosis\n")
for i in range(len(sizes)):
    print(f"Red Channel Mean ({sizes[i]})   : {t_r_means[i] / 255:.5f}")
    print(f"Green Channel Mean ({sizes[i]}) : {t_g_means[i] / 255:.5f}")
    print(f"Blue Channel Mean ({sizes[i]})  : {t_b_means[i] / 255:.5f}")
    print("\n")
    print(f"Red Channel Std ({sizes[i]})    : {t_r_stds[i] / 255:.5f}")
    print(f"Green Channel Std ({sizes[i]})  : {t_g_stds[i] / 255:.5f}")
    print(f"Blue Channel Std ({sizes[i]})   : {t_b_stds[i] / 255:.5f}")
    breaker()


**************************************************

Tuberculosis

Red Channel Mean (256)   : 0.45644
Green Channel Mean (256) : 0.50684
Blue Channel Mean (256)  : 0.52369


Red Channel Std (256)    : 0.19865
Green Channel Std (256)  : 0.21142
Blue Channel Std (256)   : 0.21833

**************************************************

Red Channel Mean (384)   : 0.45588
Green Channel Mean (384) : 0.50628
Blue Channel Mean (384)  : 0.52313


Red Channel Std (384)    : 0.19866
Green Channel Std (384)  : 0.21144
Blue Channel Std (384)   : 0.21834

**************************************************

Red Channel Mean (512)   : 0.45588
Green Channel Mean (512) : 0.50628
Blue Channel Mean (512)  : 0.52313


Red Channel Std (512)    : 0.19895
Green Channel Std (512)  : 0.21173
Blue Channel Std (512)   : 0.21863

**************************************************



In [6]:
breaker()
print("Normal + Tuberculosis\n")
for i in range(len(sizes)):
    print(f"Red Channel Mean ({sizes[i]})   : {(n_r_means[i] + t_r_means[i]) / 255:.5f}")
    print(f"Green Channel Mean ({sizes[i]}) : {(n_g_means[i] + t_g_means[i]) / 255:.5f}")
    print(f"Blue Channel Mean ({sizes[i]})  : {(n_b_means[i] + t_b_means[i])/ 255:.5f}")
    print("\n")
    print(f"Red Channel Std ({sizes[i]})    : {(n_r_stds[i] + t_r_stds[i]) / 255:.5f}")
    print(f"Green Channel Std ({sizes[i]})  : {(n_g_stds[i] + t_g_stds[i]) / 255:.5f}")
    print(f"Blue Channel Std ({sizes[i]})   : {(n_b_stds[i] + t_b_stds[i]) / 255:.5f}")
    breaker()


**************************************************

Normal + Tuberculosis

Red Channel Mean (256)   : 0.96721
Green Channel Mean (256) : 1.01761
Blue Channel Mean (256)  : 1.03446


Red Channel Std (256)    : 0.44149
Green Channel Std (256)  : 0.45427
Blue Channel Std (256)   : 0.46118

**************************************************

Red Channel Mean (384)   : 0.96616
Green Channel Mean (384) : 1.01657
Blue Channel Mean (384)  : 1.03342


Red Channel Std (384)    : 0.44160
Green Channel Std (384)  : 0.45437
Blue Channel Std (384)   : 0.46128

**************************************************

Red Channel Mean (512)   : 0.96617
Green Channel Mean (512) : 1.01657
Blue Channel Mean (512)  : 1.03342


Red Channel Std (512)    : 0.44229
Green Channel Std (512)  : 0.45506
Blue Channel Std (512)   : 0.46196

**************************************************

