<a href="https://www.kaggle.com/code/prashanthacsq/pdc-images-224?scriptVersionId=98400563" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

### **Library Imports**

In [1]:
import os
import cv2
import random as r
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


TRAIN_IMAGE_BASE_PATH = "../input/paddy-disease-classification/train_images"
SIZE = 224

### **Helpers**

In [2]:
def breaker(num: int = 50, char: str = "*") -> None:
    print("\n" + num*char + "\n")


def get_image(path: str, size: int=224) -> np.ndarray:
    return cv2.resize(src=cv2.cvtColor(src=cv2.imread(path, cv2.IMREAD_COLOR), code=cv2.COLOR_BGR2RGB), dsize=(size, size), interpolation=cv2.INTER_AREA)


def get_images(path: str, names: np.ndarray, size: int) -> np.ndarray:
    images = np.zeros((len(names), size, size, 3), dtype=np.uint8)
    
    i = 0
    for name in names:
        images[i] = get_image(os.path.join(path, name), size)
        i += 1
    return images


def get_statistics(images: list, size: int=224) -> None:
    print(f"Statistics {size}x{size}\n")
    
    for i in range(3):
        mean, std = 0.0, 0.0
        for j in range(len(images)):
            mean += images[j][:, :, i].mean()
            std += images[j][:, :, i].std()
            
        if i == 0:
            print(f"Red Channel Mean   : {mean / len(images) / 255:.5f}")
            print(f"Red Channel Std    : {std / len(images) / 255:.5f}")

        if i == 1:
            print(f"\nGreen Channel Mean : {mean / len(images) / 255:.5f}")
            print(f"Green Channel Std  : {std / len(images) / 255:.5f}")

        if i == 2:
            print(f"\nBlue Channel Mean  : {mean / len(images) / 255:.5f}")
            print(f"Blue Channel Std   : {std / len(images) / 255:.5f}")

### **Get Images**

In [3]:
df = pd.read_csv("../input/paddy-disease-classification/train.csv")
df.head(5)

Unnamed: 0,image_id,label,variety,age
0,100330.jpg,bacterial_leaf_blight,ADT45,45
1,100365.jpg,bacterial_leaf_blight,ADT45,45
2,100382.jpg,bacterial_leaf_blight,ADT45,45
3,100632.jpg,bacterial_leaf_blight,ADT45,45
4,101918.jpg,bacterial_leaf_blight,ADT45,45


In [4]:
sorted_labels = sorted(list(set(df.label)))

i = 0

images = np.zeros((1, SIZE, SIZE, 3), dtype=np.uint8)

for label in sorted_labels:
    names = df[df.label == label]["image_id"].copy().values
    temp  = get_images(os.path.join(TRAIN_IMAGE_BASE_PATH, label), names, SIZE)
    images = np.concatenate((images, temp), axis=0)

images = images[1:]
np.save(f"images-{SIZE}.npy", images)

### **Test**

In [5]:
# index = r.randint(0, images.shape[0]-1)

# test_1 = get_image(os.path.join(f"../input/paddy-disease-classification/train_images/{df.label.iloc[index]}", df.image_id.iloc[index]))
# test_2 = images[index]

num_samples = 1000
for _ in range(num_samples): 
    index = r.randint(0, images.shape[0]-1)

    test_1 = get_image(os.path.join(f"../input/paddy-disease-classification/train_images/{df.label.iloc[index]}", df.image_id.iloc[index]))
    test_2 = images[index]
    
    assert test_1.all() == test_2.all()

### **Statistics**

In [6]:
breaker()
get_statistics(images=list(images), size=224)
breaker()


**************************************************

Statistics 224x224

Red Channel Mean   : 0.49699
Red Channel Std    : 0.22036

Green Channel Mean : 0.58823
Green Channel Std  : 0.22067

Blue Channel Mean  : 0.23049
Blue Channel Std   : 0.17689

**************************************************

