# Data Augmentation Pipeline

Done with albumentations library.

## Configuration, Imports and Directories

In [1]:
import albumentations as A
import numpy as np
import gcsfs
import cv2
import os
from dotenv import load_dotenv
from pathlib import Path

load_dotenv()

ENV_TYPE = os.getenv("ENV_TYPE")
LOCAL_DATA_PATH = os.getenv("LOCAL_DATA_PATH")

DATA_PATH = "" if ENV_TYPE == "gcp" else str(LOCAL_DATA_PATH)

fs = gcsfs.GCSFileSystem()

src = Path(DATA_PATH + "cow-hooves/")
dst = Path(DATA_PATH + "augmented-cow-hooves/")

img_paths = fs.ls(src) if ENV_TYPE == "gcp" else list(Path(src).glob("*.*"))
print("image paths:", img_paths)

image paths: ['cow-hooves/flir_20251012T132854.jpg', 'cow-hooves/flir_20251012T133121.jpg', 'cow-hooves/flir_20251012T133405.jpg', 'cow-hooves/flir_20251012T133511.jpg', 'cow-hooves/flir_20251012T134100.jpg']


## Compose Transform

In [2]:
transform = A.Compose([
    A.RandomResizedCrop(size=(1440, 1080), scale=(0.8, 1.0), ratio=(0.75, 1.33), p=1.0),
    A.HorizontalFlip(p=0.5),
    A.Affine(
        translate_percent={"x": 0.15, "y": 0.15},
        scale=(0.8, 1.2),   
        rotate=(-25, 25),  
        border_mode=0, 
        p=0.9
    ),
])

## Augment Images

In [3]:
for i in range(10):
    for img_path in img_paths:
        if ENV_TYPE == 'gcp':
            with fs.open(img_path, "rb") as f:
                img_bytes = f.read()
            image = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_COLOR)
        else: 
            image = cv2.imread(str(img_path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        augmented = transform(image=image)
        aug_img = augmented["image"]

        image_name = f"{i}_{img_path.split('/')[-1]}" if ENV_TYPE == 'gcp' else str(i) + img_path.name
        save_path = f"{dst}/{image_name}"
        print(save_path)
        print(fs)

        success, encoded_img = cv2.imencode(".jpg", cv2.cvtColor(aug_img, cv2.COLOR_RGB2BGR))
        if success:
            if ENV_TYPE == 'gcp':
                with fs.open(save_path, "wb") as f:
                    f.write(encoded_img.tobytes())
            else:
                cv2.imwrite(str(save_path), cv2.cvtColor(aug_img, cv2.COLOR_RGB2BGR))

        print(f"saved: {save_path}")

augmented-cow-hooves/0_flir_20251012T132854.jpg
<gcsfs.core.GCSFileSystem object at 0x7f07a5ea3f10>
saved: augmented-cow-hooves/0_flir_20251012T132854.jpg
augmented-cow-hooves/0_flir_20251012T133121.jpg
<gcsfs.core.GCSFileSystem object at 0x7f07a5ea3f10>
saved: augmented-cow-hooves/0_flir_20251012T133121.jpg
augmented-cow-hooves/0_flir_20251012T133405.jpg
<gcsfs.core.GCSFileSystem object at 0x7f07a5ea3f10>
saved: augmented-cow-hooves/0_flir_20251012T133405.jpg
augmented-cow-hooves/0_flir_20251012T133511.jpg
<gcsfs.core.GCSFileSystem object at 0x7f07a5ea3f10>
saved: augmented-cow-hooves/0_flir_20251012T133511.jpg
augmented-cow-hooves/0_flir_20251012T134100.jpg
<gcsfs.core.GCSFileSystem object at 0x7f07a5ea3f10>
saved: augmented-cow-hooves/0_flir_20251012T134100.jpg
augmented-cow-hooves/1_flir_20251012T132854.jpg
<gcsfs.core.GCSFileSystem object at 0x7f07a5ea3f10>
saved: augmented-cow-hooves/1_flir_20251012T132854.jpg
augmented-cow-hooves/1_flir_20251012T133121.jpg
<gcsfs.core.GCSFileSys