In [1]:
import os
import cv2
import numpy as np


SIZE = 227
BASE_PATH = "../input/concrete-crack-images-for-classification"

In [2]:
def breaker(num: int = 50, char: str = "*") -> None:
    print("\n" + num*char + "\n")


def preprocess(image: np.ndarray, size: int) -> np.ndarray:
    if size != 227:
        return cv2.resize(src=cv2.cvtColor(src=image, 
                                           code=cv2.COLOR_BGR2RGB), 
                          dsize=(size, size), 
                          interpolation=cv2.INTER_AREA)
    else:
        return cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB)


def get_images(path: str, names: np.ndarray, size: int) -> np.ndarray:
    images = np.zeros((len(names), size, size, 3), dtype=np.uint8)
    
    i = 0
    for name in names:
        images[i] = preprocess(cv2.imread(os.path.join(path, name), cv2.IMREAD_COLOR), size)
        i += 1
    return images


def get_statistics(images: list, size: int) -> None:
    print(f"Statistics {size}x{size}\n")
    
    for i in range(3):
        mean, std = 0.0, 0.0
        for j in range(len(images)):
            mean += images[0][:, :, i].mean()
            std += images[0][:, :, i].std()
            
            if i == 1:
                print(f"Red Channel Mean   {size} : {mean / 255:.5f}")
                print(f"Red Channel Std    {size} : {std / 255:.5f}")
            
            if i == 2:
                print(f"Green Channel Mean {size} : {mean / 255:.5f}")
                print(f"Green Channel Std  {size} : {std / 255:.5f}")
            
            if i == 3:
                print(f"Blue Channel Mean  {size} : {mean / 255:.5f}")
                print(f"Blue Channel Std   {size} : {std / 255:.5f}")

In [3]:
pos_filenames = os.listdir(os.path.join(BASE_PATH, "Positive"))
neg_filenames = os.listdir(os.path.join(BASE_PATH, "Negative"))

pos_images = get_images(os.path.join(BASE_PATH, "Positive"), pos_filenames, SIZE)
neg_images = get_images(os.path.join(BASE_PATH, "Negative"), neg_filenames, SIZE)

labels = np.concatenate((np.zeros((len(neg_images),)), np.ones((len(pos_images),))), axis=0)
images = np.concatenate((neg_images, pos_images), axis=0)

breaker()
get_statistics(list(images), SIZE)
breaker()

np.save(f"images-{SIZE}.npy", images)
np.save(f"labels-{SIZE}.npy", labels)


**************************************************

Statistics 227x227

Red Channel Mean   227 : 0.65178
Red Channel Std    227 : 0.07285
Red Channel Mean   227 : 1.30356
Red Channel Std    227 : 0.14570
Red Channel Mean   227 : 1.95533
Red Channel Std    227 : 0.21855
Red Channel Mean   227 : 2.60711
Red Channel Std    227 : 0.29140
Red Channel Mean   227 : 3.25889
Red Channel Std    227 : 0.36424
Red Channel Mean   227 : 3.91067
Red Channel Std    227 : 0.43709
Red Channel Mean   227 : 4.56245
Red Channel Std    227 : 0.50994
Red Channel Mean   227 : 5.21422
Red Channel Std    227 : 0.58279
Red Channel Mean   227 : 5.86600
Red Channel Std    227 : 0.65564
Red Channel Mean   227 : 6.51778
Red Channel Std    227 : 0.72849
Red Channel Mean   227 : 7.16956
Red Channel Std    227 : 0.80134
Red Channel Mean   227 : 7.82134
Red Channel Std    227 : 0.87419
Red Channel Mean   227 : 8.47311
Red Channel Std    227 : 0.94704
Red Channel Mean   227 : 9.12489
Red Channel Std    227 : 1.01989
Red