# Data Augmentation Pipeline

Done with albumentations library.

## Configuration, Imports and Directories

In [86]:
import albumentations as A
import numpy as np
import gcsfs
import cv2
import os
from dotenv import load_dotenv
from pathlib import Path

load_dotenv()

ENV_TYPE = os.getenv("ENV_TYPE")
LOCAL_DATA_PATH = os.getenv("LOCAL_DATA_PATH")

DATA_PATH = "" if ENV_TYPE == "gcp" else str(LOCAL_DATA_PATH)

fs = gcsfs.GCSFileSystem()

src = Path(DATA_PATH + "cow-hooves/")
dst = Path(DATA_PATH + "augmented-cow-hooves/")

img_paths = fs.ls(src) if ENV_TYPE == "gcp" else list(Path(src).glob("*.*"))
print("image paths:", img_paths)

image paths: [PosixPath('../data/cow-hooves/salad-bowl.png')]


## Compose Transform

In [87]:
transform = A.Compose([
    A.RandomResizedCrop(size=(1440, 1080), scale=(0.8, 1.0), ratio=(0.75, 1.33), p=1.0),
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(
        shift_limit=0.15,
        scale_limit=0.2,
        rotate_limit=25,
        border_mode=0,
        p=0.9
    ),
])

## Augment Images

In [88]:
for i in range(10):
    for img_path in img_paths:
        if ENV_TYPE == 'gcp':
            with fs.open(img_path, "rb") as f:
                img_bytes = f.read()
            image = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_COLOR)
        else: 
            image = cv2.imread(str(img_path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        augmented = transform(image=image)
        aug_img = augmented["image"]

        image_name = f"{i}_{img_path.split('/')[-1]}" if ENV_TYPE == 'gcp' else str(i) + img_path.name
        save_path = f"{dst}/{image_name}"
        print(save_path)
        print(fs)

        success, encoded_img = cv2.imencode(".jpg", cv2.cvtColor(aug_img, cv2.COLOR_RGB2BGR))
        if success:
            if ENV_TYPE == 'gcp':
                with fs.open(save_path, "wb") as f:
                    f.write(encoded_img.tobytes())
            else:
                cv2.imwrite(str(save_path), cv2.cvtColor(aug_img, cv2.COLOR_RGB2BGR))

        print(f"saved: {save_path}")

../data/augmented-cow-hooves/0salad-bowl.png
<gcsfs.core.GCSFileSystem object at 0x106ff1c90>
saved: ../data/augmented-cow-hooves/0salad-bowl.png
../data/augmented-cow-hooves/1salad-bowl.png
<gcsfs.core.GCSFileSystem object at 0x106ff1c90>
saved: ../data/augmented-cow-hooves/1salad-bowl.png
../data/augmented-cow-hooves/2salad-bowl.png
<gcsfs.core.GCSFileSystem object at 0x106ff1c90>
saved: ../data/augmented-cow-hooves/2salad-bowl.png
../data/augmented-cow-hooves/3salad-bowl.png
<gcsfs.core.GCSFileSystem object at 0x106ff1c90>
saved: ../data/augmented-cow-hooves/3salad-bowl.png
../data/augmented-cow-hooves/4salad-bowl.png
<gcsfs.core.GCSFileSystem object at 0x106ff1c90>
saved: ../data/augmented-cow-hooves/4salad-bowl.png
../data/augmented-cow-hooves/5salad-bowl.png
<gcsfs.core.GCSFileSystem object at 0x106ff1c90>
saved: ../data/augmented-cow-hooves/5salad-bowl.png
../data/augmented-cow-hooves/6salad-bowl.png
<gcsfs.core.GCSFileSystem object at 0x106ff1c90>
saved: ../data/augmented-cow-h