In [2]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
# import tensorflow as tf
# from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
# todo: uncomment for kaggle environment
# base_dataset_path = r"/kaggle/input/num-ds"
# base_aug_dataset_path = r"/kaggle/working/num-ds-aug"

# todo: uncomment for local environment
base_dataset_path = r"..\dataset\dataset"
base_aug_dataset_path = r"..\dataset\dataset_aug"

train_dir = os.path.join(base_dataset_path, 'Training_data')
valid_dir = os.path.join(base_dataset_path, 'Validation_data')
test_dir = os.path.join(base_dataset_path, 'Test_data')
train_dir_aug = os.path.join(base_aug_dataset_path, 'Training_data')

# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))



In [4]:

def add_gaussian_noise_to_img(image, noise_factor=0.1):
    """
    Add Gaussian noise to an input image based on the noise factor

    Input Parameters:
    - image: Input image (numpy array).
    - noise_factor: Standard deviation of the Gaussian noise relative to pixel values.

    Returns:
    - Noisy image (numpy array).
    """


    # normalize pixel values between 0 and 1
    image_float = image.astype(np.float32) / 255.0
    
    # noise varies according to the nise factor
    noise = np.random.normal(0, noise_factor, image_float.shape)

    noisy_image = image_float + noise
    noisy_image = np.clip(noisy_image, 0, 1 )

    # restore image data type
    return (noisy_image * 255).astype(np.uint8)

In [5]:
# define function to add augmentations to image dataset 
def augment_image(image, noise_factor=0.1, augment_factor=10):
    """
    # image augmentations using OpenCV

    Input Parameters:
    - image: Input image (numpy array).
    - noise_factor: standard deviation of the Gaussian noise relative to pixel values.
    - augment_factor: factor relative to image augmentation like resize, rotate, flip, translate/shift

    Returns:
    - augmented image (numpy array).
    
    """
        
    # flip the image based on threshold related to augment factor 
    if np.random.rand() > (augment_factor/11):
        image = cv2.flip(image, 1)  # Horizontal flip

    # Rotate the image based on angle related to augment factor 
    angle = np.random.uniform((-2*augment_factor), (2*augment_factor))
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    mat = cv2.getRotationMatrix2D(center, angle, 1.0)
    image = cv2.warpAffine(image, mat, (w, h))

    # Resize the image based on augment factor
    # image = cv2.resize(image, (28, 28))  # Example for MNIST size
    image = cv2.resize(image, ((augment_factor*3), (augment_factor*3)))  # Example for MNIST size

    # add gaussian noise
    image = add_gaussian_noise_to_img(image, noise_factor)   

    return image


In [6]:
labelDict = {
    "num0":"0",
    "num1":"1",
    "num2":"2",
    "num3":"3",
    "num4":"4",
    "num5":"5",
    "num6":"6",
    "num7":"7",
    "num8":"8",
    "num9":"9"
}


# process image augmentation
train_sub_dirs = os.listdir(train_dir)

for subDir in train_sub_dirs:
    subDirPath = os.path.join(train_dir, subDir)
    files = os.listdir(subDirPath)

    for file in files:
        if not (file.endswith('.png')):
            continue

        # load image file (use a valid image path)
        filePath = os.path.join(subDirPath, file)

        image = cv2.imread(filePath, cv2.IMREAD_GRAYSCALE)

        # add Gaussian noise to the image
        noise_factor = 0.1  # adjust this factor for more or less noise
        augment_factor = 10
        image_aug = augment_image(image, noise_factor, augment_factor)

        # save to augment subfolder
        # process label transformation (eg. num1 to 1)
        subDirAug = labelDict[subDir]
        subDirPathAug = os.path.join(train_dir_aug, subDirAug)
        if not (os.path.exists(subDirPathAug)):
            os.makedirs(subDirPathAug)

        filePathAug = os.path.join(subDirPathAug, file)       
        cv2.imwrite(filePathAug, image_aug)


        