<a href="https://www.kaggle.com/code/prashanthacsq/fgvc9-statistics?scriptVersionId=91304322" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import os
import cv2
import pickle
import numpy as np
import pandas as pd

In [2]:
def breaker(num: int = 50, char: str = "*") -> None:
    print("\n" + num*char + "\n")


def preprocess(image: np.ndarray) -> np.ndarray:
    return cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB)


def get_statistics(path: str, names: np.ndarray, sizes: list, broken_images: list) -> list:
    r_means, g_means, b_means, r_stds, g_stds, b_stds = [], [], [], [], [], []

    i = 0
    for size in sizes:
        r_mean, g_mean, b_mean, r_std, g_std, b_std = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
        for name in names:
            if name in broken_images:
                pass
            else:
                main_image = preprocess(cv2.imread(os.path.join(path, name), cv2.IMREAD_COLOR))
                image = cv2.resize(src=main_image, dsize=(size, size), interpolation=cv2.INTER_AREA)
                r_mean += image[:, :, 0].mean()
                g_mean += image[:, :, 1].mean()
                b_mean += image[:, :, 2].mean()
                r_std  += image[:, :, 0].std()
                g_std  += image[:, :, 1].std()
                b_std  += image[:, :, 2].std()
        r_means.append(r_mean / len(names))
        g_means.append(g_mean / len(names))
        b_means.append(b_mean / len(names))

        r_stds.append(r_std / len(names))
        g_stds.append(g_std / len(names))
        b_stds.append(b_std / len(names))
    
    return r_means, g_means, b_means, r_stds, g_stds, b_stds 

In [3]:
train_df = pd.read_csv("../input/sorghum-id-fgvc-9/train_cultivar_mapping.csv")

broken_images = [filename for filename in train_df.image if filename not in os.listdir("../input/sorghum-id-fgvc-9/train_images")]
for broken_image in broken_images:
    index = train_df.index[train_df.image == broken_image]
    train_df = train_df.drop(index=index)
    
filenames = train_df.iloc[:, 0].copy().values

In [4]:
sizes = [256, 384, 512, 768, 1024]

r_means, g_means, b_means, r_stds, g_stds, b_stds = get_statistics("../input/sorghum-id-fgvc-9/train_images", filenames, sizes, broken_images)

In [5]:
breaker()
for i in range(len(sizes)):
    print(f"Red Channel Mean ({sizes[i]})   : {r_means[i]:.5f}")
    print(f"Green Channel Mean ({sizes[i]}) : {g_means[i]:.5f}")
    print(f"Blue Channel Mean ({sizes[i]})  : {b_means[i]:.5f}")
    print("\n")
    print(f"Red Channel Std ({sizes[i]})    : {r_stds[i]:.5f}")
    print(f"Green Channel Std ({sizes[i]})  : {g_stds[i]:.5f}")
    print(f"Blue Channel Std ({sizes[i]})   : {b_stds[i]:.5f}")
    breaker()


**************************************************

Red Channel Mean (256)   : 91.36865
Green Channel Mean (256) : 97.47721
Blue Channel Mean (256)  : 74.67664


Red Channel Std (256)    : 40.03393
Green Channel Std (256)  : 41.13178
Blue Channel Std (256)   : 31.92591

**************************************************

Red Channel Mean (384)   : 91.36874
Green Channel Mean (384) : 97.47732
Blue Channel Mean (384)  : 74.67668


Red Channel Std (384)    : 40.44440
Green Channel Std (384)  : 41.51696
Blue Channel Std (384)   : 32.26532

**************************************************

Red Channel Mean (512)   : 91.49008
Green Channel Mean (512) : 97.59776
Blue Channel Mean (512)  : 74.80131


Red Channel Std (512)    : 40.81830
Green Channel Std (512)  : 41.85729
Blue Channel Std (512)   : 32.57605

**************************************************

Red Channel Mean (768)   : 91.36847
Green Channel Mean (768) : 97.47699
Blue Channel Mean (768)  : 74.67662


Red Channel Std (768)   