In [1]:
# %%bash
# pip install --upgrade imgaug -q

In [2]:
from time import time
notebook_start_time = time()

In [3]:
import os
import cv2
# import imgaug
import numpy as np
import pandas as pd
# from imgaug import augmenters
from sklearn.preprocessing import LabelEncoder

SIZE = 256
SEED = 42
le = LabelEncoder()

In [4]:
def get_image(path: str, size: int) -> np.ndarray:
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB)
    return cv2.resize(src=image, dsize=(size, size), interpolation=cv2.INTER_AREA)


def get_images(path: str, filenames: list, size: int) -> np.ndarray:
    images = np.zeros((len(filenames), size, size, 3), dtype=np.uint8)
    i = 0
    for filename in filenames:
        images[i] = get_image(os.path.join(path, filename), size)
        i += 1
    return images


# def get_augment(seed: int = 42):
#     imgaug.seed(seed)
#     augment = augmenters.Sequential([
#         augmenters.Fliplr(0.25, seed=seed,),
#         augmenters.Flipud(0.25, seed=seed,),
#         augmenters.Affine(scale=(0.9, 1.1), translate_percent=(-0.1, 0.1), rotate=(-45, 45), seed=seed),
#     ])
    
#     return augment

In [5]:
train_df = pd.read_csv("../input/sorghum-id-fgvc-9/train_cultivar_mapping.csv")
train_df = train_df.drop(index=train_df.index[train_df.image == ".DS_Store"])

filenames = train_df.image.copy().values
labels    = train_df.cultivar.copy().values 
labels = le.fit_transform(labels)
np.save("labels.npy", labels)

In [6]:
images = get_images("../input/sorghum-id-fgvc-9/train_images", filenames, SIZE)
np.save(f"images_{SIZE}.npy", images)

print("Normal Images\n")
print(f"Red Channel Mean   : {images[:, :, 0].mean():.5f}")
print(f"Green Channel Mean : {images[:, :, 1].mean():.5f}")
print(f"Blue Channel Mean  : {images[:, :, 2].mean():.5f}")
print("")
print(f"Red Channel Std    : {images[:, :, 0].std():.5f}")
print(f"Green Channel Std  : {images[:, :, 1].std():.5f}")
print(f"Blue Channel Std   : {images[:, :, 2].std():.5f}")

# ds_augmenter = get_augment(SEED)
# augmented_images = ds_augmenter(images=images)
# np.save(f"augmented_images_{SIZE}.npy", augmented_images)
# print("Augmented Images\n")
# print(f"Red Channel Mean   : {augmented_images[:, :, 0].mean():.5f}")
# print(f"Green Channel Mean : {augmented_images[:, :, 1].mean():.5f}")
# print(f"Blue Channel Mean  : {augmented_images[:, :, 2].mean():.5f}")
# print("")
# print(f"Red Channel Std    : {augmented_images[:, :, 0].std():.5f}")
# print(f"Green Channel Std  : {augmented_images[:, :, 1].std():.5f}")
# print(f"Blue Channel Std   : {augmented_images[:, :, 2].std():.5f}")

Normal Images

Red Channel Mean   : 94.98985
Green Channel Mean : 95.85722
Blue Channel Mean  : 95.72068

Red Channel Std    : 51.80701
Green Channel Std  : 53.98795
Blue Channel Std   : 53.95851


In [7]:
print(f"Notebook Runtime : {(time() - notebook_start_time)/60:.2f} minutes")

Notebook Runtime : 18.83 minutes
