In [1]:
import os
import cv2
import numpy as np
import pandas as pd

In [2]:
def get_image(path: str, size: int=224) -> np.ndarray:
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB)
    image = cv2.resize(src=image, dsize=(size, size), interpolation=cv2.INTER_AREA)
    return image


def get_statistics(images: list, size: int) -> None:
    print(f"Statistics {size}x{size}\n")
    
    for i in range(3):
        mean, std = 0.0, 0.0
        for j in range(len(images)):
            mean += images[j][:, :, i].mean()
            std += images[j][:, :, i].std()
        
        if i == 0:
            print(f"\nRed Channel Mean   {size} : {mean / len(images) / 255:.5f}")
            print(f"Red Channel Std    {size} : {std / len(images) / 255:.5f}")

        if i == 1:
            print(f"\nGreen Channel Mean {size} : {mean / len(images) / 255:.5f}")
            print(f"Green Channel Std  {size} : {std / len(images) / 255:.5f}")

        if i == 2:
            print(f"\nBlue Channel Mean  {size} : {mean / len(images) / 255:.5f}")
            print(f"Blue Channel Std   {size} : {std / len(images) / 255:.5f}")

            
def reserve_memory(path: str, size: int) -> tuple:
    total_num_files: int = 0
    for name in os.listdir(path):
        if "." in name or name == "test":
            pass
        else:
            total_num_files += len(os.listdir(os.path.join(path, name)))
    total_num_files -= 2
    images = np.zeros((total_num_files, size, size, 3), dtype=np.uint8)
    labels = np.ones((total_num_files, ), dtype=np.uint8)
    return images, labels

In [3]:
path: str = "../input/weather-dataset/dataset"
folders: list = sorted([folder_name for folder_name in os.listdir(path) if "." not in folder_name and folder_name != "test"])
sizes: list   = [224, 384, 512] 

for size in sizes:
    images, labels = reserve_memory(path, size)  
    i: int = 0
    j: int = 0
    for folder_name in folders:
        for filename in os.listdir(os.path.join(path, folder_name)):
            if filename == "4514.jpg" or filename == "1187.jpg":
                i -= 1
            else:
                images[i] = get_image(os.path.join(os.path.join(path, folder_name), filename), size=size)
                labels[i] = labels[i] * j
            i += 1
        j += 1

    np.save(f"images_{size}.npy", images)
    np.save(f"labels_{size}.npy", labels)
    
    get_statistics(images=list(images), size=size)

Statistics 224x224


Red Channel Mean   224 : 0.51703
Red Channel Std    224 : 0.19119

Green Channel Mean 224 : 0.52522
Green Channel Std  224 : 0.18506

Blue Channel Mean  224 : 0.50585
Blue Channel Std   224 : 0.19167
Statistics 384x384


Red Channel Mean   384 : 0.51684
Red Channel Std    384 : 0.19350

Green Channel Mean 384 : 0.52503
Green Channel Std  384 : 0.18743

Blue Channel Mean  384 : 0.50567
Blue Channel Std   384 : 0.19404
Statistics 512x512


Red Channel Mean   512 : 0.51688
Red Channel Std    512 : 0.19438

Green Channel Mean 512 : 0.52507
Green Channel Std  512 : 0.18833

Blue Channel Mean  512 : 0.50570
Blue Channel Std   512 : 0.19493
