In [1]:
import os
import cv2
import numpy as np
import multiprocess as mp

TRAIN_BASE_PATH = "../input/human-action-detection-artificial-intelligence/emirhan_human_dataset/datasets/human_data/train_data"
TEST_BASE_PATH  = "../input/human-action-detection-artificial-intelligence/emirhan_human_dataset/datasets/human_data/test_data"
SIZE = 224

In [2]:
def breaker(num: int = 50, char: str = "*") -> None:
    print("\n" + num*char + "\n")


def preprocess(image: np.ndarray, size: int) -> np.ndarray:
    return cv2.resize(src=cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB), dsize=(size, size), interpolation=cv2.INTER_AREA)


def get_images(path: str, names: np.ndarray, size: int) -> np.ndarray:
    images = np.zeros((len(names), size, size, 3), dtype=np.uint8)
    
    i = 0
    for name in names:
        images[i] = preprocess(cv2.imread(os.path.join(path, name), cv2.IMREAD_COLOR), size)
        i += 1
    return images


def get_statistics(images: list, size: int) -> None:
    print(f"Statistics {size}x{size}\n")
    
    for i in range(3):
        mean, std = 0.0, 0.0
        for j in range(len(images)):
            mean += images[0][:, :, i].mean()
            std += images[0][:, :, i].std()
        
        if i == 0:
            print(f"\nRed Channel Mean   {size} : {mean / len(images) / 255:.5f}")
            print(f"Red Channel Std    {size} : {std / len(images) / 255:.5f}")

        if i == 1:
            print(f"\nGreen Channel Mean {size} : {mean / len(images) / 255:.5f}")
            print(f"Green Channel Std  {size} : {std / len(images) / 255:.5f}")

        if i == 2:
            print(f"\nBlue Channel Mean  {size} : {mean / len(images) / 255:.5f}")
            print(f"Blue Channel Std   {size} : {std / len(images) / 255:.5f}")

### **Train**

In [3]:
def create_train_images():
    tr_labels = sorted(os.listdir(TRAIN_BASE_PATH))
    tr_images = np.zeros((1, SIZE, SIZE, 3), dtype=np.uint8)
    tr_labels = np.zeros((len(os.listdir("../input/human-action-detection-artificial-intelligence/emirhan_human_dataset/datasets/human_data/train_data/calling"), )))

    i = 1
    for label in os.listdir(TRAIN_BASE_PATH):
        images = get_images(os.path.join(TRAIN_BASE_PATH, label), 
                            os.listdir(os.path.join(TRAIN_BASE_PATH, label)), 
                            SIZE)
        tr_images = np.concatenate((tr_images, images), axis=0)
        tr_labels = np.concatenate((tr_labels, np.ones(len(os.listdir(os.path.join(TRAIN_BASE_PATH, label))), ) * i), axis=0)
        i += 1

    np.save(f"tr-images-{SIZE}.npy", tr_images[1:])
    np.save(f"tr-labels-{SIZE}.npy", tr_labels)

    breaker()
    get_statistics(list(tr_images[1:]), SIZE)
    breaker()


proc = mp.Process(target=create_train_images)
proc.start()
proc.join()


**************************************************

Statistics 224x224


Red Channel Mean   224 : 0.66416
Red Channel Std    224 : 0.28239

Green Channel Mean 224 : 0.66332
Green Channel Std  224 : 0.26653

Blue Channel Mean  224 : 0.64562
Blue Channel Std   224 : 0.26788

**************************************************



### **Test**

In [4]:
def create_test_images():
    ts_labels = sorted(os.listdir(TEST_BASE_PATH))
    ts_images = np.zeros((1, SIZE, SIZE, 3), dtype=np.uint8)
    ts_labels = np.zeros((len(os.listdir("../input/human-action-detection-artificial-intelligence/emirhan_human_dataset/datasets/human_data/test_data/calling"), )))

    i = 1
    for label in os.listdir(TEST_BASE_PATH):
        images = get_images(os.path.join(TEST_BASE_PATH, label), 
                               os.listdir(os.path.join(TEST_BASE_PATH, label)), 
                               SIZE)
        ts_images = np.concatenate((ts_images, images), axis=0)
        ts_labels = np.concatenate((ts_labels, np.ones(len(os.listdir(os.path.join(TEST_BASE_PATH, label))), ) * i), axis=0)
        i += 1

    np.save(f"ts-images-{SIZE}.npy", ts_images[1:])
    np.save(f"ts-labels-{SIZE}.npy", ts_labels)

    breaker()
    get_statistics(list(ts_images[1:]), SIZE)
    breaker()

proc = mp.Process(target=create_test_images)
proc.start()
proc.join()


**************************************************

Statistics 224x224


Red Channel Mean   224 : 0.46361
Red Channel Std    224 : 0.22014

Green Channel Mean 224 : 0.45379
Green Channel Std  224 : 0.20495

Blue Channel Mean  224 : 0.44087
Blue Channel Std   224 : 0.21331

**************************************************



### **Train + Test**

In [5]:
def create_images():
    tr_labels = sorted(os.listdir(TRAIN_BASE_PATH))
    tr_images = np.zeros((1, SIZE, SIZE, 3), dtype=np.uint8)
    tr_labels = np.zeros((len(os.listdir("../input/human-action-detection-artificial-intelligence/emirhan_human_dataset/datasets/human_data/train_data/calling"), )))

    i = 1
    for label in os.listdir(TRAIN_BASE_PATH):
        temp_images = get_images(os.path.join(TRAIN_BASE_PATH, label), 
                                 os.listdir(os.path.join(TRAIN_BASE_PATH, label)), 
                                 SIZE)
        tr_images = np.concatenate((tr_images, temp_images), axis=0)
        tr_labels = np.concatenate((tr_labels, np.ones(len(os.listdir(os.path.join(TRAIN_BASE_PATH, label))), ) * i), axis=0)
        i += 1
    
    ts_labels = sorted(os.listdir(TEST_BASE_PATH))
    ts_images = np.zeros((1, SIZE, SIZE, 3), dtype=np.uint8)
    ts_labels = np.zeros((len(os.listdir("../input/human-action-detection-artificial-intelligence/emirhan_human_dataset/datasets/human_data/test_data/calling"), )))

    del temp_images
    
    i = 1
    for label in os.listdir(TEST_BASE_PATH):
        temp_images = get_images(os.path.join(TEST_BASE_PATH, label), 
                                 os.listdir(os.path.join(TEST_BASE_PATH, label)), 
                                 SIZE)
        ts_images = np.concatenate((ts_images, temp_images), axis=0)
        ts_labels = np.concatenate((ts_labels, np.ones(len(os.listdir(os.path.join(TEST_BASE_PATH, label))), ) * i), axis=0)
        i += 1
    
    images = np.concatenate((tr_images[1:], ts_images[1:]), axis=0)
    labels = np.concatenate((tr_labels, ts_labels), axis=0)
    
    np.save(f"images-{SIZE}.npy", images)
    np.save(f"labels-{SIZE}.npy", labels)

    breaker()
    get_statistics(list(images), SIZE)
    breaker()

proc = mp.Process(target=create_images)
proc.start()
proc.join()


**************************************************

Statistics 224x224


Red Channel Mean   224 : 0.66416
Red Channel Std    224 : 0.28239

Green Channel Mean 224 : 0.66332
Green Channel Std  224 : 0.26653

Blue Channel Mean  224 : 0.64562
Blue Channel Std   224 : 0.26788

**************************************************

