In [None]:
import os
import cv2
import numpy as np
import pandas as pd

In [None]:
def breaker(num: int = 50, char: str = "*") -> None:
    print("\n" + num*char + "\n")


def preprocess(image: np.ndarray, size: int) -> np.ndarray:
    return cv2.resize(src=cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB), dsize=(size, size), interpolation=cv2.INTER_AREA)


def get_images(path: str, names: np.ndarray, size: int) -> np.ndarray:
    images = np.zeros((len(names), size, size, 3), dtype=np.uint8)
    
    i = 0
    for name in names:
        images[i] = preprocess(cv2.imread(os.path.join(path, name+".jpg"), cv2.IMREAD_COLOR), size)
        i += 1
    return images


def save(train_images: np.ndarray, test_images: np.ndarray, targets: np.ndarray, size: int) -> None:
    np.save(f"./train_images_{size}.npy", train_images)
    np.save(f"./test_images_{size}.npy", test_images)
    np.save(f"./targets_{size}.npy", targets)


def get_statistics(images: np.ndarray, size: int) -> None:
    breaker()
    print(f"Mean {size}x{size}\n")
    print(f"Red Channel Mean   : {train_images[:, :, 0].mean() / 255}")
    print(f"Green Channel Mean : {train_images[:, :, 1].mean() / 255}")
    print(f"Blue Channel Mean  : {train_images[:, :, 2].mean() / 255}")

    breaker()
    print(f"Standard Deviation {size}x{size}\n")
    print(f"Red Channel Std   : {train_images[:, :, 0].std() / 255}")
    print(f"Green Channel Std : {train_images[:, :, 1].std() / 255}")
    print(f"Blue Channel Std  : {train_images[:, :, 2].std() / 255}")
    breaker()

In [None]:
train_df = pd.read_csv("../input/plant-pathology-2020-fgvc7/train.csv")
ss_df    = pd.read_csv("../input/plant-pathology-2020-fgvc7/sample_submission.csv")


train_filenames = train_df["image_id"].copy().values
test_filenames  = ss_df["image_id"].copy().values
targets   = train_df.iloc[:, 1:].copy().values

In [None]:
size = 224

train_images = get_images("../input/plant-pathology-2020-fgvc7/images", train_filenames, size)
test_images  = get_images("../input/plant-pathology-2020-fgvc7/images", test_filenames, size)

save(train_images, test_images, targets, size)

get_statistics(train_images, size)

In [None]:
size = 320

train_images = get_images("../input/plant-pathology-2020-fgvc7/images", train_filenames, size)
test_images  = get_images("../input/plant-pathology-2020-fgvc7/images", test_filenames, size)

save(train_images, test_images, targets, size)

get_statistics(train_images, size)

In [None]:
size = 384

train_images = get_images("../input/plant-pathology-2020-fgvc7/images", train_filenames, size)
test_images  = get_images("../input/plant-pathology-2020-fgvc7/images", test_filenames, size)

save(train_images, test_images, targets, size)

get_statistics(train_images, size)

In [None]:
size = 512

train_images = get_images("../input/plant-pathology-2020-fgvc7/images", train_filenames, size)
test_images  = get_images("../input/plant-pathology-2020-fgvc7/images", test_filenames, size)

save(train_images, test_images, targets, size)

get_statistics(train_images, size)